提交 c1299e7f 编写于 作者: G gangliao 提交者: GitHub

Merge pull request #804 from gangliao/check_avx

Add inline and bit manipulation in CpuId.h
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#ifdef _WIN32 #ifdef _WIN32
#include <intrin.h>
/// for MSVC /// for MSVC
#define CPUID(info, x) __cpuidex(info, x, 0) #define CPUID(info, x) __cpuidex(info, x, 0)
...@@ -31,25 +33,27 @@ namespace paddle { ...@@ -31,25 +33,27 @@ namespace paddle {
SIMDFlags::SIMDFlags() { SIMDFlags::SIMDFlags() {
unsigned int cpuInfo[4]; unsigned int cpuInfo[4];
// CPUID: https://en.wikipedia.org/wiki/CPUID // CPUID: https://en.wikipedia.org/wiki/CPUID
// clang-format off
CPUID(cpuInfo, 0x00000001); CPUID(cpuInfo, 0x00000001);
simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE; simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE;
simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE; simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 0) ? SIMD_SSE3 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 0) ? SIMD_SSE3 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 9) ? SIMD_SSSE3 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 9) ? SIMD_SSSE3 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 19) ? SIMD_SSE41 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 19) ? SIMD_SSE41 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 20) ? SIMD_SSE42 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 20) ? SIMD_SSE42 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX : SIMD_NONE;
CPUID(cpuInfo, 0x00000007); CPUID(cpuInfo, 0x00000007);
simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE; simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE;
simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512 : SIMD_NONE; simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512: SIMD_NONE;
CPUID(cpuInfo, 0x80000001); CPUID(cpuInfo, 0x80000001);
simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE;
// clang-fotmat on
} }
SIMDFlags* SIMDFlags::instance() { SIMDFlags const* SIMDFlags::instance() {
static SIMDFlags instance; static SIMDFlags instance;
return &instance; return &instance;
} }
......
...@@ -11,61 +11,90 @@ limitations under the License. */ ...@@ -11,61 +11,90 @@ limitations under the License. */
#pragma once #pragma once
#include <iostream>
#include "DisableCopy.h" #include "DisableCopy.h"
namespace paddle { namespace paddle {
// clang-format off
enum simd_t {
SIMD_NONE = 0, ///< None
SIMD_SSE = 1 << 0, ///< SSE
SIMD_SSE2 = 1 << 1, ///< SSE 2
SIMD_SSE3 = 1 << 2, ///< SSE 3
SIMD_SSSE3 = 1 << 3, ///< SSSE 3
SIMD_SSE41 = 1 << 4, ///< SSE 4.1
SIMD_SSE42 = 1 << 5, ///< SSE 4.2
SIMD_FMA3 = 1 << 6, ///< FMA 3
SIMD_FMA4 = 1 << 7, ///< FMA 4
SIMD_AVX = 1 << 8, ///< AVX
SIMD_AVX2 = 1 << 9, ///< AVX 2
SIMD_AVX512 = 1 << 10, ///< AVX 512
};
// clang-format on
class SIMDFlags final { class SIMDFlags final {
public: public:
DISABLE_COPY(SIMDFlags); DISABLE_COPY(SIMDFlags);
SIMDFlags(); SIMDFlags();
static SIMDFlags* instance(); static SIMDFlags const* instance();
inline bool isSSE() const { return simd_flags_ & SIMD_SSE; } inline bool check(int flags) const {
inline bool isSSE2() const { return simd_flags_ & SIMD_SSE2; } return !((simd_flags_ & flags) ^ flags);
inline bool isSSE3() const { return simd_flags_ & SIMD_SSE3; } }
inline bool isSSSE3() const { return simd_flags_ & SIMD_SSSE3; }
inline bool isSSE41() const { return simd_flags_ & SIMD_SSE41; }
inline bool isSSE42() const { return simd_flags_ & SIMD_SSE42; }
inline bool isFMA3() const { return simd_flags_ & SIMD_FMA3; }
inline bool isFMA4() const { return simd_flags_ & SIMD_FMA4; }
inline bool isAVX() const { return simd_flags_ & SIMD_AVX; }
inline bool isAVX2() const { return simd_flags_ & SIMD_AVX2; }
inline bool isAVX512() const { return simd_flags_ & SIMD_AVX512; }
private: private:
enum simd_t {
SIMD_NONE = 0, ///< None
SIMD_SSE = 1 << 0, ///< SSE
SIMD_SSE2 = 1 << 1, ///< SSE 2
SIMD_SSE3 = 1 << 2, ///< SSE 3
SIMD_SSSE3 = 1 << 3, ///< SSSE 3
SIMD_SSE41 = 1 << 4, ///< SSE 4.1
SIMD_SSE42 = 1 << 5, ///< SSE 4.2
SIMD_FMA3 = 1 << 6, ///< FMA 3
SIMD_FMA4 = 1 << 7, ///< FMA 4
SIMD_AVX = 1 << 8, ///< AVX
SIMD_AVX2 = 1 << 9, ///< AVX 2
SIMD_AVX512 = 1 << 10, ///< AVX 512
};
/// simd flags
int simd_flags_ = SIMD_NONE; int simd_flags_ = SIMD_NONE;
}; };
#define HAS_SSE SIMDFlags::instance()->isSSE() /**
#define HAS_SSE2 SIMDFlags::instance()->isSSE2() * @brief Check SIMD flags at runtime.
#define HAS_SSE3 SIMDFlags::instance()->isSSE3() *
#define HAS_SSSE3 SIMDFlags::instance()->isSSSE3() * For example.
#define HAS_SSE41 SIMDFlags::instance()->isSSE41() * @code{.cpp}
#define HAS_SSE42 SIMDFlags::instance()->isSSE42() *
#define HAS_FMA3 SIMDFlags::instance()->isFMA3() * if (HAS_SIMD(SIMD_AVX2 | SIMD_FMA4)) {
#define HAS_FMA4 SIMDFlags::instance()->isFMA4() * avx2_fm4_stub();
#define HAS_AVX SIMDFlags::instance()->isAVX() * } else if (HAS_SIMD(SIMD_AVX)) {
#define HAS_AVX2 SIMDFlags::instance()->isAVX2() * avx_stub();
#define HAS_AVX512 SIMDFlags::instance()->isAVX512() * }
*
* @endcode
*/
#define HAS_SIMD(__flags) SIMDFlags::instance()->check(__flags)
/**
* @brief Check SIMD flags at runtime.
*
* 1. Check all SIMD flags at runtime:
*
* @code{.cpp}
* if (HAS_AVX && HAS_AVX2) {
* avx2_stub();
* }
* @endcod
*
* 2. Check one SIMD flag at runtime:
*
* @code{.cpp}
* if (HAS_SSE41 || HAS_SSE42) {
* sse4_stub();
* }
* @endcode
*/
// clang-format off
#define HAS_SSE HAS_SIMD(SIMD_SSE)
#define HAS_SSE2 HAS_SIMD(SIMD_SSE2)
#define HAS_SSE3 HAS_SIMD(SIMD_SSE3)
#define HAS_SSSE3 HAS_SIMD(SIMD_SSSE3)
#define HAS_SSE41 HAS_SIMD(SIMD_SSE41)
#define HAS_SSE42 HAS_SIMD(SIMD_SSE42)
#define HAS_FMA3 HAS_SIMD(SIMD_FMA3)
#define HAS_FMA4 HAS_SIMD(SIMD_FMA4)
#define HAS_AVX HAS_SIMD(SIMD_AVX)
#define HAS_AVX2 HAS_SIMD(SIMD_AVX2)
#define HAS_AVX512 HAS_SIMD(SIMD_AVX512)
// clang-format on
} // namespace paddle } // namespace paddle
...@@ -19,33 +19,33 @@ using namespace paddle; // NOLINT ...@@ -19,33 +19,33 @@ using namespace paddle; // NOLINT
TEST(SIMDFlags, gccTest) { TEST(SIMDFlags, gccTest) {
#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)) #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__))
CHECK(!__builtin_cpu_supports("sse") != HAS_SSE); // clang-format off
CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2); CHECK(!__builtin_cpu_supports("sse") != HAS_SSE);
CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3); CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2);
CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3); CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3);
CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3);
CHECK(!__builtin_cpu_supports("sse4.1") != HAS_SSE41); CHECK(!__builtin_cpu_supports("sse4.1") != HAS_SSE41);
CHECK(!__builtin_cpu_supports("sse4.2") != HAS_SSE42); CHECK(!__builtin_cpu_supports("sse4.2") != HAS_SSE42);
CHECK(!__builtin_cpu_supports("avx") != HAS_AVX); CHECK(!__builtin_cpu_supports("avx") != HAS_AVX);
CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2); CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2);
// clang-format on
#endif #endif
} }
TEST(SIMDFlags, normalPrint) { TEST(SIMDFlags, normalPrint) {
auto simd = SIMDFlags::instance(); LOG(INFO) << "Has SSE: " << std::boolalpha << HAS_SSE;
LOG(INFO) << "Has SSE2: " << std::boolalpha << simd->isSSE2(); LOG(INFO) << "Has SSE2: " << std::boolalpha << HAS_SSE2;
LOG(INFO) << "Has SSE3: " << std::boolalpha << simd->isSSE3(); LOG(INFO) << "Has SSE3: " << std::boolalpha << HAS_SSE3;
LOG(INFO) << "Has SSSE3: " << std::boolalpha << simd->isSSSE3(); LOG(INFO) << "Has SSSE3: " << std::boolalpha << HAS_SSSE3;
LOG(INFO) << "Has SSE4.1: " << std::boolalpha << simd->isSSE41(); LOG(INFO) << "Has SSE4: " << std::boolalpha << HAS_SSE41 || HAS_SSE42;
LOG(INFO) << "Has SSE4.2: " << std::boolalpha << simd->isSSE42(); LOG(INFO) << "Has FMA3: " << std::boolalpha << HAS_FMA3;
LOG(INFO) << "Has FMA3: " << std::boolalpha << simd->isFMA3(); LOG(INFO) << "Has FMA4: " << std::boolalpha << HAS_FMA4;
LOG(INFO) << "Has FMA4: " << std::boolalpha << simd->isFMA4(); LOG(INFO) << "Has AVX: " << std::boolalpha << HAS_AVX;
LOG(INFO) << "Has AVX: " << std::boolalpha << simd->isAVX(); LOG(INFO) << "Has AVX2: " << std::boolalpha << HAS_AVX2;
LOG(INFO) << "Has AVX2: " << std::boolalpha << simd->isAVX2(); LOG(INFO) << "Has AVX512: " << std::boolalpha << HAS_AVX512;
LOG(INFO) << "Has AVX512: " << std::boolalpha << simd->isAVX512();
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
paddle::initMain(argc, argv);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册