diff --git a/paddle/utils/CpuId.cpp b/paddle/utils/CpuId.cpp index 53db82e48a54ba629edde0c74cdadc7bb83b33d2..8eefdd2980e7f56a836df6fd2ff8c31b81a55555 100644 --- a/paddle/utils/CpuId.cpp +++ b/paddle/utils/CpuId.cpp @@ -14,6 +14,8 @@ limitations under the License. */ #ifdef _WIN32 +#include + /// for MSVC #define CPUID(info, x) __cpuidex(info, x, 0) @@ -31,25 +33,27 @@ namespace paddle { SIMDFlags::SIMDFlags() { unsigned int cpuInfo[4]; // CPUID: https://en.wikipedia.org/wiki/CPUID + // clang-format off CPUID(cpuInfo, 0x00000001); - simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE; - simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 0) ? SIMD_SSE3 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 9) ? SIMD_SSSE3 : SIMD_NONE; + simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE; + simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE; + simd_flags_ |= cpuInfo[2] & (1 << 0) ? SIMD_SSE3 : SIMD_NONE; + simd_flags_ |= cpuInfo[2] & (1 << 9) ? SIMD_SSSE3 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 19) ? SIMD_SSE41 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 20) ? SIMD_SSE42 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX : SIMD_NONE; + simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3 : SIMD_NONE; + simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX : SIMD_NONE; CPUID(cpuInfo, 0x00000007); - simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE; - simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512 : SIMD_NONE; + simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE; + simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512: SIMD_NONE; CPUID(cpuInfo, 0x80000001); - simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE; + simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE; + // clang-fotmat on } -SIMDFlags* SIMDFlags::instance() { +SIMDFlags const* SIMDFlags::instance() { static SIMDFlags instance; return &instance; } diff --git a/paddle/utils/CpuId.h b/paddle/utils/CpuId.h index 66ac59cf3e50a0e70b073f5fc67e88f54924f90a..7a354da75851ed7cca4e85e77714624634951f00 100644 --- a/paddle/utils/CpuId.h +++ b/paddle/utils/CpuId.h @@ -11,61 +11,90 @@ limitations under the License. */ #pragma once -#include #include "DisableCopy.h" namespace paddle { +// clang-format off +enum simd_t { + SIMD_NONE = 0, ///< None + SIMD_SSE = 1 << 0, ///< SSE + SIMD_SSE2 = 1 << 1, ///< SSE 2 + SIMD_SSE3 = 1 << 2, ///< SSE 3 + SIMD_SSSE3 = 1 << 3, ///< SSSE 3 + SIMD_SSE41 = 1 << 4, ///< SSE 4.1 + SIMD_SSE42 = 1 << 5, ///< SSE 4.2 + SIMD_FMA3 = 1 << 6, ///< FMA 3 + SIMD_FMA4 = 1 << 7, ///< FMA 4 + SIMD_AVX = 1 << 8, ///< AVX + SIMD_AVX2 = 1 << 9, ///< AVX 2 + SIMD_AVX512 = 1 << 10, ///< AVX 512 +}; +// clang-format on + class SIMDFlags final { public: DISABLE_COPY(SIMDFlags); SIMDFlags(); - static SIMDFlags* instance(); + static SIMDFlags const* instance(); - inline bool isSSE() const { return simd_flags_ & SIMD_SSE; } - inline bool isSSE2() const { return simd_flags_ & SIMD_SSE2; } - inline bool isSSE3() const { return simd_flags_ & SIMD_SSE3; } - inline bool isSSSE3() const { return simd_flags_ & SIMD_SSSE3; } - inline bool isSSE41() const { return simd_flags_ & SIMD_SSE41; } - inline bool isSSE42() const { return simd_flags_ & SIMD_SSE42; } - inline bool isFMA3() const { return simd_flags_ & SIMD_FMA3; } - inline bool isFMA4() const { return simd_flags_ & SIMD_FMA4; } - inline bool isAVX() const { return simd_flags_ & SIMD_AVX; } - inline bool isAVX2() const { return simd_flags_ & SIMD_AVX2; } - inline bool isAVX512() const { return simd_flags_ & SIMD_AVX512; } + inline bool check(int flags) const { + return !((simd_flags_ & flags) ^ flags); + } private: - enum simd_t { - SIMD_NONE = 0, ///< None - SIMD_SSE = 1 << 0, ///< SSE - SIMD_SSE2 = 1 << 1, ///< SSE 2 - SIMD_SSE3 = 1 << 2, ///< SSE 3 - SIMD_SSSE3 = 1 << 3, ///< SSSE 3 - SIMD_SSE41 = 1 << 4, ///< SSE 4.1 - SIMD_SSE42 = 1 << 5, ///< SSE 4.2 - SIMD_FMA3 = 1 << 6, ///< FMA 3 - SIMD_FMA4 = 1 << 7, ///< FMA 4 - SIMD_AVX = 1 << 8, ///< AVX - SIMD_AVX2 = 1 << 9, ///< AVX 2 - SIMD_AVX512 = 1 << 10, ///< AVX 512 - }; - - /// simd flags int simd_flags_ = SIMD_NONE; }; -#define HAS_SSE SIMDFlags::instance()->isSSE() -#define HAS_SSE2 SIMDFlags::instance()->isSSE2() -#define HAS_SSE3 SIMDFlags::instance()->isSSE3() -#define HAS_SSSE3 SIMDFlags::instance()->isSSSE3() -#define HAS_SSE41 SIMDFlags::instance()->isSSE41() -#define HAS_SSE42 SIMDFlags::instance()->isSSE42() -#define HAS_FMA3 SIMDFlags::instance()->isFMA3() -#define HAS_FMA4 SIMDFlags::instance()->isFMA4() -#define HAS_AVX SIMDFlags::instance()->isAVX() -#define HAS_AVX2 SIMDFlags::instance()->isAVX2() -#define HAS_AVX512 SIMDFlags::instance()->isAVX512() +/** + * @brief Check SIMD flags at runtime. + * + * For example. + * @code{.cpp} + * + * if (HAS_SIMD(SIMD_AVX2 | SIMD_FMA4)) { + * avx2_fm4_stub(); + * } else if (HAS_SIMD(SIMD_AVX)) { + * avx_stub(); + * } + * + * @endcode + */ +#define HAS_SIMD(__flags) SIMDFlags::instance()->check(__flags) + +/** + * @brief Check SIMD flags at runtime. + * + * 1. Check all SIMD flags at runtime: + * + * @code{.cpp} + * if (HAS_AVX && HAS_AVX2) { + * avx2_stub(); + * } + * @endcod + * + * 2. Check one SIMD flag at runtime: + * + * @code{.cpp} + * if (HAS_SSE41 || HAS_SSE42) { + * sse4_stub(); + * } + * @endcode + */ +// clang-format off +#define HAS_SSE HAS_SIMD(SIMD_SSE) +#define HAS_SSE2 HAS_SIMD(SIMD_SSE2) +#define HAS_SSE3 HAS_SIMD(SIMD_SSE3) +#define HAS_SSSE3 HAS_SIMD(SIMD_SSSE3) +#define HAS_SSE41 HAS_SIMD(SIMD_SSE41) +#define HAS_SSE42 HAS_SIMD(SIMD_SSE42) +#define HAS_FMA3 HAS_SIMD(SIMD_FMA3) +#define HAS_FMA4 HAS_SIMD(SIMD_FMA4) +#define HAS_AVX HAS_SIMD(SIMD_AVX) +#define HAS_AVX2 HAS_SIMD(SIMD_AVX2) +#define HAS_AVX512 HAS_SIMD(SIMD_AVX512) +// clang-format on } // namespace paddle diff --git a/paddle/utils/tests/test_SIMDFlags.cpp b/paddle/utils/tests/test_SIMDFlags.cpp index 41532953a719684f6dd6fd78a05ada0b22085f99..42edede209ad957c13c1cec8e6bb20bd0fe9d28b 100644 --- a/paddle/utils/tests/test_SIMDFlags.cpp +++ b/paddle/utils/tests/test_SIMDFlags.cpp @@ -19,33 +19,33 @@ using namespace paddle; // NOLINT TEST(SIMDFlags, gccTest) { #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)) - CHECK(!__builtin_cpu_supports("sse") != HAS_SSE); - CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2); - CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3); - CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3); + // clang-format off + CHECK(!__builtin_cpu_supports("sse") != HAS_SSE); + CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2); + CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3); + CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3); CHECK(!__builtin_cpu_supports("sse4.1") != HAS_SSE41); CHECK(!__builtin_cpu_supports("sse4.2") != HAS_SSE42); - CHECK(!__builtin_cpu_supports("avx") != HAS_AVX); - CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2); + CHECK(!__builtin_cpu_supports("avx") != HAS_AVX); + CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2); +// clang-format on #endif } TEST(SIMDFlags, normalPrint) { - auto simd = SIMDFlags::instance(); - LOG(INFO) << "Has SSE2: " << std::boolalpha << simd->isSSE2(); - LOG(INFO) << "Has SSE3: " << std::boolalpha << simd->isSSE3(); - LOG(INFO) << "Has SSSE3: " << std::boolalpha << simd->isSSSE3(); - LOG(INFO) << "Has SSE4.1: " << std::boolalpha << simd->isSSE41(); - LOG(INFO) << "Has SSE4.2: " << std::boolalpha << simd->isSSE42(); - LOG(INFO) << "Has FMA3: " << std::boolalpha << simd->isFMA3(); - LOG(INFO) << "Has FMA4: " << std::boolalpha << simd->isFMA4(); - LOG(INFO) << "Has AVX: " << std::boolalpha << simd->isAVX(); - LOG(INFO) << "Has AVX2: " << std::boolalpha << simd->isAVX2(); - LOG(INFO) << "Has AVX512: " << std::boolalpha << simd->isAVX512(); + LOG(INFO) << "Has SSE: " << std::boolalpha << HAS_SSE; + LOG(INFO) << "Has SSE2: " << std::boolalpha << HAS_SSE2; + LOG(INFO) << "Has SSE3: " << std::boolalpha << HAS_SSE3; + LOG(INFO) << "Has SSSE3: " << std::boolalpha << HAS_SSSE3; + LOG(INFO) << "Has SSE4: " << std::boolalpha << HAS_SSE41 || HAS_SSE42; + LOG(INFO) << "Has FMA3: " << std::boolalpha << HAS_FMA3; + LOG(INFO) << "Has FMA4: " << std::boolalpha << HAS_FMA4; + LOG(INFO) << "Has AVX: " << std::boolalpha << HAS_AVX; + LOG(INFO) << "Has AVX2: " << std::boolalpha << HAS_AVX2; + LOG(INFO) << "Has AVX512: " << std::boolalpha << HAS_AVX512; } int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); - paddle::initMain(argc, argv); return RUN_ALL_TESTS(); }