提交 4a386c3e 编写于 作者: L Linus Torvalds

Merge branch 'x86-xsave-for-linus' of...

Merge branch 'x86-xsave-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-xsave-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, xsave: Make xstate_enable_boot_cpu() __init, protect on CPU 0
  x86, xsave: Add __init attribute to setup_xstate_features()
  x86, xsave: Make init_xstate_buf static
  x86, xsave: Check cpuid level for XSTATE_CPUID (0x0d)
  x86, xsave: Introduce xstate enable functions
  x86, xsave: Separate fpu and xsave initialization
  x86, xsave: Move boot cpu initialization to xsave_init()
  x86, xsave: 32/64 bit boot cpu check unification in initialization
  x86, xsave: Do not include asm/i387.h in asm/xsave.h
  x86, xsave: Use xsaveopt in context-switch path when supported
  x86, xsave: Sync xsave memory layout with its header for user handling
  x86, xsave: Track the offset, size of state in the xsave layout
...@@ -31,7 +31,6 @@ extern void mxcsr_feature_mask_init(void); ...@@ -31,7 +31,6 @@ extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
extern asmlinkage void math_state_restore(void); extern asmlinkage void math_state_restore(void);
extern void __math_state_restore(void); extern void __math_state_restore(void);
extern void init_thread_xstate(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_active_fn fpregs_active, xfpregs_active;
...@@ -58,11 +57,25 @@ extern int restore_i387_xstate_ia32(void __user *buf); ...@@ -58,11 +57,25 @@ extern int restore_i387_xstate_ia32(void __user *buf);
#define X87_FSW_ES (1 << 7) /* Exception Summary */ #define X87_FSW_ES (1 << 7) /* Exception Summary */
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void) static __always_inline __pure bool use_xsave(void)
{ {
return static_cpu_has(X86_FEATURE_XSAVE); return static_cpu_has(X86_FEATURE_XSAVE);
} }
extern void __sanitize_i387_state(struct task_struct *);
static inline void sanitize_i387_state(struct task_struct *tsk)
{
if (!use_xsaveopt())
return;
__sanitize_i387_state(tsk);
}
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* Ignore delayed exceptions from user space */ /* Ignore delayed exceptions from user space */
......
...@@ -3,7 +3,8 @@ ...@@ -3,7 +3,8 @@
#include <linux/types.h> #include <linux/types.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/i387.h>
#define XSTATE_CPUID 0x0000000d
#define XSTATE_FP 0x1 #define XSTATE_FP 0x1
#define XSTATE_SSE 0x2 #define XSTATE_SSE 0x2
...@@ -32,10 +33,8 @@ ...@@ -32,10 +33,8 @@
extern unsigned int xstate_size; extern unsigned int xstate_size;
extern u64 pcntxt_mask; extern u64 pcntxt_mask;
extern struct xsave_struct *init_xstate_buf;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void xsave_cntxt_init(void);
extern void xsave_init(void); extern void xsave_init(void);
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
...@@ -127,12 +126,25 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask) ...@@ -127,12 +126,25 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
: "memory"); : "memory");
} }
static inline void xsave_state(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static inline void fpu_xsave(struct fpu *fpu) static inline void fpu_xsave(struct fpu *fpu)
{ {
/* This, however, we can work around by forcing the compiler to select /* This, however, we can work around by forcing the compiler to select
an addressing mode that doesn't require extended registers. */ an addressing mode that doesn't require extended registers. */
__asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" alternative_input(
: : "D" (&(fpu->state->xsave)), ".byte " REX_PREFIX "0x0f,0xae,0x27",
"a" (-1), "d"(-1) : "memory"); ".byte " REX_PREFIX "0x0f,0xae,0x37",
X86_FEATURE_XSAVEOPT,
[fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) :
"memory");
} }
#endif #endif
...@@ -140,10 +140,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); ...@@ -140,10 +140,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
static int __init x86_xsave_setup(char *s) static int __init x86_xsave_setup(char *s)
{ {
setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
return 1; return 1;
} }
__setup("noxsave", x86_xsave_setup); __setup("noxsave", x86_xsave_setup);
static int __init x86_xsaveopt_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
return 1;
}
__setup("noxsaveopt", x86_xsaveopt_setup);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
static int cachesize_override __cpuinitdata = -1; static int cachesize_override __cpuinitdata = -1;
static int disable_x86_serial_nr __cpuinitdata = 1; static int disable_x86_serial_nr __cpuinitdata = 1;
...@@ -1202,6 +1210,7 @@ void __cpuinit cpu_init(void) ...@@ -1202,6 +1210,7 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs(); dbg_restore_debug_regs();
fpu_init(); fpu_init();
xsave_init();
raw_local_save_flags(kernel_eflags); raw_local_save_flags(kernel_eflags);
...@@ -1262,12 +1271,7 @@ void __cpuinit cpu_init(void) ...@@ -1262,12 +1271,7 @@ void __cpuinit cpu_init(void)
clear_used_math(); clear_used_math();
mxcsr_feature_mask_init(); mxcsr_feature_mask_init();
/* fpu_init();
* Boot processor to setup the FP and extended state context info.
*/
if (smp_processor_id() == boot_cpu_id)
init_thread_xstate();
xsave_init(); xsave_init();
} }
#endif #endif
...@@ -59,18 +59,18 @@ void __cpuinit mxcsr_feature_mask_init(void) ...@@ -59,18 +59,18 @@ void __cpuinit mxcsr_feature_mask_init(void)
stts(); stts();
} }
void __cpuinit init_thread_xstate(void) static void __cpuinit init_thread_xstate(void)
{ {
/*
* Note that xstate_size might be overwriten later during
* xsave_init().
*/
if (!HAVE_HWFP) { if (!HAVE_HWFP) {
xstate_size = sizeof(struct i387_soft_struct); xstate_size = sizeof(struct i387_soft_struct);
return; return;
} }
if (cpu_has_xsave) {
xsave_cntxt_init();
return;
}
if (cpu_has_fxsr) if (cpu_has_fxsr)
xstate_size = sizeof(struct i387_fxsave_struct); xstate_size = sizeof(struct i387_fxsave_struct);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -84,6 +84,7 @@ void __cpuinit init_thread_xstate(void) ...@@ -84,6 +84,7 @@ void __cpuinit init_thread_xstate(void)
* Called at bootup to set up the initial FPU state that is later cloned * Called at bootup to set up the initial FPU state that is later cloned
* into all processes. * into all processes.
*/ */
void __cpuinit fpu_init(void) void __cpuinit fpu_init(void)
{ {
unsigned long oldcr0 = read_cr0(); unsigned long oldcr0 = read_cr0();
...@@ -93,19 +94,24 @@ void __cpuinit fpu_init(void) ...@@ -93,19 +94,24 @@ void __cpuinit fpu_init(void)
write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
/*
* Boot processor to setup the FP and extended state context info.
*/
if (!smp_processor_id()) if (!smp_processor_id())
init_thread_xstate(); init_thread_xstate();
xsave_init();
mxcsr_feature_mask_init(); mxcsr_feature_mask_init();
/* clean state in init */ /* clean state in init */
current_thread_info()->status = 0; current_thread_info()->status = 0;
clear_used_math(); clear_used_math();
} }
#endif /* CONFIG_X86_64 */
#else /* CONFIG_X86_64 */
void __cpuinit fpu_init(void)
{
if (!smp_processor_id())
init_thread_xstate();
}
#endif /* CONFIG_X86_32 */
void fpu_finit(struct fpu *fpu) void fpu_finit(struct fpu *fpu)
{ {
...@@ -191,6 +197,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, ...@@ -191,6 +197,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
if (ret) if (ret)
return ret; return ret;
sanitize_i387_state(target);
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.state->fxsave, 0, -1); &target->thread.fpu.state->fxsave, 0, -1);
} }
...@@ -208,6 +216,8 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, ...@@ -208,6 +216,8 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
if (ret) if (ret)
return ret; return ret;
sanitize_i387_state(target);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.state->fxsave, 0, -1); &target->thread.fpu.state->fxsave, 0, -1);
...@@ -447,6 +457,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, ...@@ -447,6 +457,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
-1); -1);
} }
sanitize_i387_state(target);
if (kbuf && pos == 0 && count == sizeof(env)) { if (kbuf && pos == 0 && count == sizeof(env)) {
convert_from_fxsr(kbuf, target); convert_from_fxsr(kbuf, target);
return 0; return 0;
...@@ -468,6 +480,8 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, ...@@ -468,6 +480,8 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
if (ret) if (ret)
return ret; return ret;
sanitize_i387_state(target);
if (!HAVE_HWFP) if (!HAVE_HWFP)
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
...@@ -534,6 +548,9 @@ static int save_i387_xsave(void __user *buf) ...@@ -534,6 +548,9 @@ static int save_i387_xsave(void __user *buf)
struct _fpstate_ia32 __user *fx = buf; struct _fpstate_ia32 __user *fx = buf;
int err = 0; int err = 0;
sanitize_i387_state(tsk);
/* /*
* For legacy compatible, we always set FP/SSE bits in the bit * For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. * vector while saving the state to the user context.
......
...@@ -16,11 +16,88 @@ ...@@ -16,11 +16,88 @@
*/ */
u64 pcntxt_mask; u64 pcntxt_mask;
/*
* Represents init state for the supported extended state.
*/
static struct xsave_struct *init_xstate_buf;
struct _fpx_sw_bytes fx_sw_reserved; struct _fpx_sw_bytes fx_sw_reserved;
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
struct _fpx_sw_bytes fx_sw_reserved_ia32; struct _fpx_sw_bytes fx_sw_reserved_ia32;
#endif #endif
static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
/*
* If a processor implementation discern that a processor state component is
* in its initialized state it may modify the corresponding bit in the
* xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory
* layout in the case of xsaveopt. While presenting the xstate information to
* the user, we always ensure that the memory layout of a feature will be in
* the init state if the corresponding header bit is zero. This is to ensure
* that the user doesn't see some stale state in the memory layout during
* signal handling, debugging etc.
*/
void __sanitize_i387_state(struct task_struct *tsk)
{
u64 xstate_bv;
int feature_bit = 0x2;
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
if (!fx)
return;
BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU);
xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
/*
* None of the feature bits are in init state. So nothing else
* to do for us, as the memory layout is upto date.
*/
if ((xstate_bv & pcntxt_mask) == pcntxt_mask)
return;
/*
* FP is in init state
*/
if (!(xstate_bv & XSTATE_FP)) {
fx->cwd = 0x37f;
fx->swd = 0;
fx->twd = 0;
fx->fop = 0;
fx->rip = 0;
fx->rdp = 0;
memset(&fx->st_space[0], 0, 128);
}
/*
* SSE is in init state
*/
if (!(xstate_bv & XSTATE_SSE))
memset(&fx->xmm_space[0], 0, 256);
xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2;
/*
* Update all the other memory layouts for which the corresponding
* header bit is in the init state.
*/
while (xstate_bv) {
if (xstate_bv & 0x1) {
int offset = xstate_offsets[feature_bit];
int size = xstate_sizes[feature_bit];
memcpy(((void *) fx) + offset,
((void *) init_xstate_buf) + offset,
size);
}
xstate_bv >>= 1;
feature_bit++;
}
}
/* /*
* Check for the presence of extended state information in the * Check for the presence of extended state information in the
* user fpstate pointer in the sigcontext. * user fpstate pointer in the sigcontext.
...@@ -102,6 +179,7 @@ int save_i387_xstate(void __user *buf) ...@@ -102,6 +179,7 @@ int save_i387_xstate(void __user *buf)
task_thread_info(tsk)->status &= ~TS_USEDFPU; task_thread_info(tsk)->status &= ~TS_USEDFPU;
stts(); stts();
} else { } else {
sanitize_i387_state(tsk);
if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
xstate_size)) xstate_size))
return -1; return -1;
...@@ -267,11 +345,6 @@ static void prepare_fx_sw_frame(void) ...@@ -267,11 +345,6 @@ static void prepare_fx_sw_frame(void)
#endif #endif
} }
/*
* Represents init state for the supported extended state.
*/
struct xsave_struct *init_xstate_buf;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
unsigned int sig_xstate_size = sizeof(struct _fpstate); unsigned int sig_xstate_size = sizeof(struct _fpstate);
#endif #endif
...@@ -279,18 +352,35 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate); ...@@ -279,18 +352,35 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate);
/* /*
* Enable the extended processor state save/restore feature * Enable the extended processor state save/restore feature
*/ */
void __cpuinit xsave_init(void) static inline void xstate_enable(void)
{ {
if (!cpu_has_xsave)
return;
set_in_cr4(X86_CR4_OSXSAVE); set_in_cr4(X86_CR4_OSXSAVE);
xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
}
/* /*
* Enable all the features that the HW is capable of * Record the offsets and sizes of different state managed by the xsave
* and the Linux kernel is aware of. * memory layout.
*/ */
xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); static void __init setup_xstate_features(void)
{
int eax, ebx, ecx, edx, leaf = 0x2;
xstate_features = fls64(pcntxt_mask);
xstate_offsets = alloc_bootmem(xstate_features * sizeof(int));
xstate_sizes = alloc_bootmem(xstate_features * sizeof(int));
do {
cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
if (eax == 0)
break;
xstate_offsets[leaf] = ebx;
xstate_sizes[leaf] = eax;
leaf++;
} while (1);
} }
/* /*
...@@ -298,18 +388,41 @@ void __cpuinit xsave_init(void) ...@@ -298,18 +388,41 @@ void __cpuinit xsave_init(void)
*/ */
static void __init setup_xstate_init(void) static void __init setup_xstate_init(void)
{ {
setup_xstate_features();
/*
* Setup init_xstate_buf to represent the init state of
* all the features managed by the xsave
*/
init_xstate_buf = alloc_bootmem(xstate_size); init_xstate_buf = alloc_bootmem(xstate_size);
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
clts();
/*
* Init all the features state with header_bv being 0x0
*/
xrstor_state(init_xstate_buf, -1);
/*
* Dump the init state again. This is to identify the init state
* of any feature which is not represented by all zero's.
*/
xsave_state(init_xstate_buf, -1);
stts();
} }
/* /*
* Enable and initialize the xsave feature. * Enable and initialize the xsave feature.
*/ */
void __ref xsave_cntxt_init(void) static void __init xstate_enable_boot_cpu(void)
{ {
unsigned int eax, ebx, ecx, edx; unsigned int eax, ebx, ecx, edx;
cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
WARN(1, KERN_ERR "XSTATE_CPUID missing\n");
return;
}
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
pcntxt_mask = eax + ((u64)edx << 32); pcntxt_mask = eax + ((u64)edx << 32);
if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
...@@ -322,12 +435,13 @@ void __ref xsave_cntxt_init(void) ...@@ -322,12 +435,13 @@ void __ref xsave_cntxt_init(void)
* Support only the state known to OS. * Support only the state known to OS.
*/ */
pcntxt_mask = pcntxt_mask & XCNTXT_MASK; pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
xsave_init();
xstate_enable();
/* /*
* Recompute the context size for enabled features * Recompute the context size for enabled features
*/ */
cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
xstate_size = ebx; xstate_size = ebx;
update_regset_xstate_info(xstate_size, pcntxt_mask); update_regset_xstate_info(xstate_size, pcntxt_mask);
...@@ -339,3 +453,23 @@ void __ref xsave_cntxt_init(void) ...@@ -339,3 +453,23 @@ void __ref xsave_cntxt_init(void)
"cntxt size 0x%x\n", "cntxt size 0x%x\n",
pcntxt_mask, xstate_size); pcntxt_mask, xstate_size);
} }
/*
* For the very first instance, this calls xstate_enable_boot_cpu();
* for all subsequent instances, this calls xstate_enable().
*
* This is somewhat obfuscated due to the lack of powerful enough
* overrides for the section checks.
*/
void __cpuinit xsave_init(void)
{
static __refdata void (*next_func)(void) = xstate_enable_boot_cpu;
void (*this_func)(void);
if (!cpu_has_xsave)
return;
this_func = next_func;
next_func = xstate_enable;
this_func();
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册