diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a8eb6afce6a412a80ce576911bc17b20294891d6..5ae8608ca9f58a6331cae454ae7a0c84ab2f6c47 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2200,6 +2200,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted. and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. + noxsaveopt [X86] Disables xsaveopt used in saving x86 extended + register states. The kernel will fall back to use + xsave to save the states. By using this parameter, + performance of saving the states is degraded because + xsave doesn't support modified optimization while + xsaveopt supports it on xsaveopt enabled systems. + + noxsaves [X86] Disables xsaves and xrstors used in saving and + restoring x86 extended register state in compacted + form of xsave area. The kernel will fall back to use + xsaveopt and xrstor to save and restore the states + in standard form of xsave area. By using this + parameter, xsave area per process might occupy more + memory on xsaves enabled systems. + eagerfpu= [X86] on enable eager fpu restore off disable eager fpu restore diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 0a3f9c9f98d5ccf446980cfcf71b7055bb79ade8..473bdbee378a10ac2030b586dc33d3be74de5a11 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -161,6 +161,20 @@ static inline int alternatives_text_reserved(void *start, void *end) asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ : : "i" (0), ## input) +/* + * This is similar to alternative_input. But it has two features and + * respective instructions. + * + * If CPU has feature2, newinstr2 is used. + * Otherwise, if CPU has feature1, newinstr1 is used. + * Otherwise, oldinstr is used. + */ +#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \ + feature2, input...) \ + asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ + newinstr2, feature2) \ + : : "i" (0), ## input) + /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, feature, output, input...) \ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e3b85422cf127d38806a903ba6c2d0e6f664bf2f..412ececa00b957014a535e0a36d15ae108a88ffa 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -508,9 +508,12 @@ static inline void user_fpu_begin(void) static inline void __save_fpu(struct task_struct *tsk) { - if (use_xsave()) - xsave_state(&tsk->thread.fpu.state->xsave, -1); - else + if (use_xsave()) { + if (unlikely(system_state == SYSTEM_BOOTING)) + xsave_state_booting(&tsk->thread.fpu.state->xsave, -1); + else + xsave_state(&tsk->thread.fpu.state->xsave, -1); + } else fpu_fxsave(&tsk->thread.fpu); } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ee30b9f0b91c9d36f04bb4b13200a5d675e8a873..eb71ec794732b98f09531054c37ecc19c5d94f52 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -385,8 +385,8 @@ struct bndcsr_struct { struct xsave_hdr_struct { u64 xstate_bv; - u64 reserved1[2]; - u64 reserved2[5]; + u64 xcomp_bv; + u64 reserved[6]; } __attribute__((packed)); struct xsave_struct { diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index d949ef28c48bd9c423c5c668a1b0ce61ed70cac5..7e7a79ada6584fa4161e285f1f32181a29baa70e 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -52,24 +52,170 @@ extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); -static inline int fpu_xrstor_checking(struct xsave_struct *fx) +/* These macros all use (%edi)/(%rdi) as the single memory argument. */ +#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" +#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" +#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" +#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" + +#define xstate_fault ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err) + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask) { - int err; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XSAVES"\n\t" + "2:\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XSAVE"\n\t" + "2:\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile(xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); - asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XRSTORS"\n\t" + "2:\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XRSTOR"\n\t" + "2:\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile(xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * Save processor xstate to xsave area. + */ +static inline int xsave_state(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + /* + * If xsaves is enabled, xsaves replaces xsaveopt because + * it supports compact format and supervisor states in addition to + * modified optimization in xsaveopt. + * + * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave + * because xsaveopt supports modified optimization which is not + * supported by xsave. + * + * If none of xsaves and xsaveopt is enabled, use xsave. + */ + alternative_input_2( + "1:"XSAVE, + "1:"XSAVEOPT, + X86_FEATURE_XSAVEOPT, + "1:"XSAVES, + X86_FEATURE_XSAVES, + [fx] "D" (fx), "a" (lmask), "d" (hmask) : + "memory"); + asm volatile("2:\n\t" + xstate_fault + : "0" (0) : "memory"); return err; } +/* + * Restore processor xstate from xsave area. + */ +static inline int xrstor_state(struct xsave_struct *fx, u64 mask) +{ + int err = 0; + u32 lmask = mask; + u32 hmask = mask >> 32; + + /* + * Use xrstors to restore context if it is enabled. xrstors supports + * compacted format of xsave area which is not supported by xrstor. + */ + alternative_input( + "1: " XRSTOR, + "1: " XRSTORS, + X86_FEATURE_XSAVES, + "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile("2:\n" + xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * Save xstate context for old process during context switch. + */ +static inline void fpu_xsave(struct fpu *fpu) +{ + xsave_state(&fpu->state->xsave, -1); +} + +/* + * Restore xstate context for new process during context switch. + */ +static inline int fpu_xrstor_checking(struct xsave_struct *fx) +{ + return xrstor_state(fx, -1); +} + +/* + * Save xstate to user space xsave area. + * + * We don't use modified optimization because xrstor/xrstors might track + * a different application. + * + * We don't use compacted format xsave area for + * backward compatibility for old applications which don't understand + * compacted format of xsave area. + */ static inline int xsave_user(struct xsave_struct __user *buf) { int err; @@ -83,69 +229,34 @@ static inline int xsave_user(struct xsave_struct __user *buf) return -EFAULT; __asm__ __volatile__(ASM_STAC "\n" - "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + "1:"XSAVE"\n" "2: " ASM_CLAC "\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b,3b) - : [err] "=r" (err) + xstate_fault : "D" (buf), "a" (-1), "d" (-1), "0" (0) : "memory"); return err; } +/* + * Restore xstate from user space xsave area. + */ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) { - int err; + int err = 0; struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); u32 lmask = mask; u32 hmask = mask >> 32; __asm__ __volatile__(ASM_STAC "\n" - "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + "1:"XRSTOR"\n" "2: " ASM_CLAC "\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b,3b) - : [err] "=r" (err) + xstate_fault : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) : "memory"); /* memory required? */ return err; } -static inline void xrstor_state(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - - asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); -} - -static inline void xsave_state(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; +void *get_xsave_addr(struct xsave_struct *xsave, int xstate); +void setup_xstate_comp(void); - asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); -} - -static inline void fpu_xsave(struct fpu *fpu) -{ - /* This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - alternative_input( - ".byte " REX_PREFIX "0x0f,0xae,0x27", - ".byte " REX_PREFIX "0x0f,0xae,0x37", - X86_FEATURE_XSAVEOPT, - [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) : - "memory"); -} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 333fd5209336d3df51e8ed676e4aa4b937a48b6b..e4ab2b42bd6f469528c73cac4ac0fb186b5bdbd3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -148,6 +148,7 @@ static int __init x86_xsave_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + setup_clear_cpu_cap(X86_FEATURE_XSAVES); setup_clear_cpu_cap(X86_FEATURE_AVX); setup_clear_cpu_cap(X86_FEATURE_AVX2); return 1; @@ -161,6 +162,13 @@ static int __init x86_xsaveopt_setup(char *s) } __setup("noxsaveopt", x86_xsaveopt_setup); +static int __init x86_xsaves_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + return 1; +} +__setup("noxsaves", x86_xsaves_setup); + #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index d5dd808144190ffd1d443229b4dbbd56740fface..a9a4229f6161b25b6e8a5752be7943f478525d21 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, /* * These bits must be zero. */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + memset(xsave_hdr->reserved, 0, 48); return ret; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4505e2a950d81f479663df20ce787dbad5f293b0..f804dc935d2adffb37f587d34de9c4cd10ce70a3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -93,6 +93,7 @@ void arch_task_cache_init(void) kmem_cache_create("task_xstate", xstate_size, __alignof__(union thread_xstate), SLAB_PANIC | SLAB_NOTRACK, NULL); + setup_xstate_comp(); } /* diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a4b451c6addfb7085a22c70b408614a58bf0ad7b..940b142cc11f8390ebdc15d1121cd67cc24c94aa 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -24,7 +25,9 @@ u64 pcntxt_mask; struct xsave_struct *init_xstate_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; -static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; +static unsigned int *xstate_offsets, *xstate_sizes; +static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8]; +static unsigned int xstate_features; /* * If a processor implementation discern that a processor state component is @@ -283,7 +286,7 @@ sanitize_restored_xstate(struct task_struct *tsk, if (use_xsave()) { /* These bits must be zero. */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + memset(xsave_hdr->reserved, 0, 48); /* * Init the state that is not present in the memory @@ -478,6 +481,52 @@ static void __init setup_xstate_features(void) } while (1); } +/* + * This function sets up offsets and sizes of all extended states in + * xsave area. This supports both standard format and compacted format + * of the xsave aread. + * + * Input: void + * Output: void + */ +void setup_xstate_comp(void) +{ + unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8]; + int i; + + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); + + if (!cpu_has_xsaves) { + for (i = 2; i < xstate_features; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + xstate_comp_offsets[i] = xstate_offsets[i]; + xstate_comp_sizes[i] = xstate_sizes[i]; + } + } + return; + } + + xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; + + for (i = 2; i < xstate_features; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) + xstate_comp_sizes[i] = xstate_sizes[i]; + else + xstate_comp_sizes[i] = 0; + + if (i > 2) + xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + + xstate_comp_sizes[i-1]; + + } +} + /* * setup the xstate image representing the init state */ @@ -496,15 +545,21 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); + if (cpu_has_xsaves) { + init_xstate_buf->xsave_hdr.xcomp_bv = + (u64)1 << 63 | pcntxt_mask; + init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask; + } + /* * Init all the features state with header_bv being 0x0 */ - xrstor_state(init_xstate_buf, -1); + xrstor_state_booting(init_xstate_buf, -1); /* * Dump the init state again. This is to identify the init state * of any feature which is not represented by all zero's. */ - xsave_state(init_xstate_buf, -1); + xsave_state_booting(init_xstate_buf, -1); } static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; @@ -520,6 +575,30 @@ static int __init eager_fpu_setup(char *s) } __setup("eagerfpu=", eager_fpu_setup); + +/* + * Calculate total size of enabled xstates in XCR0/pcntxt_mask. + */ +static void __init init_xstate_size(void) +{ + unsigned int eax, ebx, ecx, edx; + int i; + + if (!cpu_has_xsaves) { + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + xstate_size = ebx; + return; + } + + xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + for (i = 2; i < 64; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); + xstate_size += eax; + } + } +} + /* * Enable and initialize the xsave feature. */ @@ -551,8 +630,7 @@ static void __init xstate_enable_boot_cpu(void) /* * Recompute the context size for enabled features */ - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; + init_xstate_size(); update_regset_xstate_info(xstate_size, pcntxt_mask); prepare_fx_sw_frame(); @@ -572,8 +650,9 @@ static void __init xstate_enable_boot_cpu(void) } } - pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", - pcntxt_mask, xstate_size); + pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n", + pcntxt_mask, xstate_size, + cpu_has_xsaves ? "compacted form" : "standard form"); } /* @@ -635,3 +714,26 @@ void eager_fpu_init(void) else fxrstor_checking(&init_xstate_buf->i387); } + +/* + * Given the xsave area and a state inside, this function returns the + * address of the state. + * + * This is the API that is called to get xstate address in either + * standard format or compacted format of xsave area. + * + * Inputs: + * xsave: base address of the xsave area; + * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, + * etc.) + * Output: + * address of the state in the xsave area. + */ +void *get_xsave_addr(struct xsave_struct *xsave, int xstate) +{ + int feature = fls64(xstate) - 1; + if (!test_bit(feature, (unsigned long *)&pcntxt_mask)) + return NULL; + + return (void *)xsave + xstate_comp_offsets[feature]; +}