// Copyright 2018 The Gemmlowp Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // detect_platform.h: Sets up macros that control architecture-specific // features of gemmlowp's implementation. #ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ #define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ // Our inline assembly path assume GCC/Clang syntax. // Native Client doesn't seem to support inline assembly(?). #if defined(__GNUC__) && !defined(__native_client__) #define GEMMLOWP_ALLOW_INLINE_ASM #endif // Define macro statement that avoids inlining for GCC. // For non-GCC, define as empty macro. #if defined(__GNUC__) #define GEMMLOWP_NOINLINE __attribute__((noinline)) #else #define GEMMLOWP_NOINLINE #endif // Detect ARM, 32-bit or 64-bit #ifdef __arm__ #define GEMMLOWP_ARM_32 #endif #ifdef __aarch64__ #define GEMMLOWP_ARM_64 #endif #if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64) #define GEMMLOWP_ARM #endif // Detect MIPS, 32-bit or 64-bit #if defined(__mips) && !defined(__LP64__) #define GEMMLOWP_MIPS_32 #endif #if defined(__mips) && defined(__LP64__) #define GEMMLOWP_MIPS_64 #endif #if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64) #define GEMMLOWP_MIPS #endif // Detect x86, 32-bit or 64-bit #if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386) #define GEMMLOWP_X86_32 #endif #if defined(__x86_64__) || defined(_M_X64) || defined(__amd64) #define GEMMLOWP_X86_64 #endif #if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64) #define GEMMLOWP_X86 #endif // Some of our optimized paths use inline assembly and for // now we don't bother enabling some other optimized paths using intrinddics // where we can't use inline assembly paths. #ifdef GEMMLOWP_ALLOW_INLINE_ASM // Detect NEON. It's important to check for both tokens. #if (defined __ARM_NEON) || (defined __ARM_NEON__) #define GEMMLOWP_NEON #endif // Convenience NEON tokens for 32-bit or 64-bit #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32) #define GEMMLOWP_NEON_32 #endif #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64) #define GEMMLOWP_NEON_64 #endif // Detect MIPS MSA. // Limit MSA optimizations to little-endian CPUs for now. // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? #if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \ defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #define GEMMLOWP_MSA #endif // Convenience MIPS MSA tokens for 32-bit or 64-bit. #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32) #define GEMMLOWP_MSA_32 #endif #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64) #define GEMMLOWP_MSA_64 #endif // compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2 // Detect AVX2 #if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2) #define GEMMLOWP_AVX2 // Detect SSE4. // MSVC does not have __SSE4_1__ macro, but will enable SSE4 // when AVX is turned on. #elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__)) #define GEMMLOWP_SSE4 // Detect SSE3. #elif defined(__SSE3__) #define GEMMLOWP_SSE3 #endif // Convenience SSE4 tokens for 32-bit or 64-bit #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \ !defined(GEMMLOWP_DISABLE_SSE4) #define GEMMLOWP_SSE4_32 #endif #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32) #define GEMMLOWP_SSE3_32 #endif #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \ !defined(GEMMLOWP_DISABLE_SSE4) #define GEMMLOWP_SSE4_64 #endif #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64) #define GEMMLOWP_SSE3_64 #endif #if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64) #define GEMMLOWP_AVX2_64 #endif #if defined(__has_feature) #if __has_feature(memory_sanitizer) #include #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison #elif __has_feature(address_sanitizer) #include #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region #endif #endif #endif // GEMMLOWP_ALLOW_INLINE_ASM // Detect Android. Don't conflate with ARM - we care about tuning // for non-ARM Android devices too. This can be used in conjunction // with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs. #if defined(__ANDROID__) || defined(ANDROID) #define GEMMLOWP_ANDROID #endif #endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_