提交 cf66bb93 编写于 作者: D David Woodhouse

byteorder: allow arch to opt to use GCC intrinsics for byteswapping

Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16()
intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC,
48 for other platforms).

By using these instead of the inline assembler that most architectures
have in their __arch_swabXX() macros, we let the compiler see what's
actually happening. The resulting code should be at least as good, and
much *better* in the cases where it can be combined with a nearby load
or store, using a load-and-byteswap or store-and-byteswap instruction
(e.g. lwbrx/stwbrx on PowerPC, movbe on Atom).

When GCC is sufficiently recent *and* the architecture opts in to using
the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be
used in preference to the __arch_swabXX() macros. An architecture which
does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own
hand-crafted macros.
Signed-off-by: NDavid Woodhouse <David.Woodhouse@intel.com>
Acked-by: NH. Peter Anvin <hpa@linux.intel.com>
上级 27d7c2a0
...@@ -112,6 +112,25 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS ...@@ -112,6 +112,25 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS
See Documentation/unaligned-memory-access.txt for more See Documentation/unaligned-memory-access.txt for more
information on the topic of unaligned memory accesses. information on the topic of unaligned memory accesses.
config ARCH_USE_BUILTIN_BSWAP
bool
help
Modern versions of GCC (since 4.4) have builtin functions
for handling byte-swapping. Using these, instead of the old
inline assembler that the architecture code provides in the
__arch_bswapXX() macros, allows the compiler to see what's
happening and offers more opportunity for optimisation. In
particular, the compiler will be able to combine the byteswap
with a nearby load or store and use load-and-swap or
store-and-swap instructions if the architecture has them. It
should almost *never* result in code which is worse than the
hand-coded assembler in <asm/swab.h>. But just in case it
does, the use of the builtins is optional.
Any architecture with load-and-swap or store-and-swap
instructions should set this. And it shouldn't hurt to set it
on architectures that don't have such instructions.
config HAVE_SYSCALL_WRAPPERS config HAVE_SYSCALL_WRAPPERS
bool bool
......
...@@ -63,3 +63,13 @@ ...@@ -63,3 +63,13 @@
#define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_warning(message) __attribute__((warning(message)))
#define __compiletime_error(message) __attribute__((error(message))) #define __compiletime_error(message) __attribute__((error(message)))
#endif #endif
#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
#if __GNUC_MINOR__ >= 4
#define __HAVE_BUILTIN_BSWAP32__
#define __HAVE_BUILTIN_BSWAP64__
#endif
#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6)
#define __HAVE_BUILTIN_BSWAP16__
#endif
#endif
...@@ -29,3 +29,10 @@ ...@@ -29,3 +29,10 @@
#endif #endif
#define uninitialized_var(x) x #define uninitialized_var(x) x
#ifndef __HAVE_BUILTIN_BSWAP16__
/* icc has this, but it's called _bswap16 */
#define __HAVE_BUILTIN_BSWAP16__
#define __builtin_bswap16 _bswap16
#endif
...@@ -45,7 +45,9 @@ ...@@ -45,7 +45,9 @@
static inline __attribute_const__ __u16 __fswab16(__u16 val) static inline __attribute_const__ __u16 __fswab16(__u16 val)
{ {
#ifdef __arch_swab16 #ifdef __HAVE_BUILTIN_BSWAP16__
return __builtin_bswap16(val);
#elif defined (__arch_swab16)
return __arch_swab16(val); return __arch_swab16(val);
#else #else
return ___constant_swab16(val); return ___constant_swab16(val);
...@@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) ...@@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val)
static inline __attribute_const__ __u32 __fswab32(__u32 val) static inline __attribute_const__ __u32 __fswab32(__u32 val)
{ {
#ifdef __arch_swab32 #ifdef __HAVE_BUILTIN_BSWAP32__
return __builtin_bswap32(val);
#elif defined(__arch_swab32)
return __arch_swab32(val); return __arch_swab32(val);
#else #else
return ___constant_swab32(val); return ___constant_swab32(val);
...@@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val) ...@@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val)
static inline __attribute_const__ __u64 __fswab64(__u64 val) static inline __attribute_const__ __u64 __fswab64(__u64 val)
{ {
#ifdef __arch_swab64 #ifdef __HAVE_BUILTIN_BSWAP64__
return __builtin_bswap64(val);
#elif defined (__arch_swab64)
return __arch_swab64(val); return __arch_swab64(val);
#elif defined(__SWAB_64_THRU_32__) #elif defined(__SWAB_64_THRU_32__)
__u32 h = val >> 32; __u32 h = val >> 32;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册