提交 c1299e7f 编写于 作者: G gangliao 提交者: GitHub

Merge pull request #804 from gangliao/check_avx

Add inline and bit manipulation in CpuId.h
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#ifdef _WIN32 #ifdef _WIN32
#include <intrin.h>
/// for MSVC /// for MSVC
#define CPUID(info, x) __cpuidex(info, x, 0) #define CPUID(info, x) __cpuidex(info, x, 0)
...@@ -31,6 +33,7 @@ namespace paddle { ...@@ -31,6 +33,7 @@ namespace paddle {
SIMDFlags::SIMDFlags() { SIMDFlags::SIMDFlags() {
unsigned int cpuInfo[4]; unsigned int cpuInfo[4];
// CPUID: https://en.wikipedia.org/wiki/CPUID // CPUID: https://en.wikipedia.org/wiki/CPUID
// clang-format off
CPUID(cpuInfo, 0x00000001); CPUID(cpuInfo, 0x00000001);
simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE; simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE;
simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE; simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE;
...@@ -43,13 +46,14 @@ SIMDFlags::SIMDFlags() { ...@@ -43,13 +46,14 @@ SIMDFlags::SIMDFlags() {
CPUID(cpuInfo, 0x00000007); CPUID(cpuInfo, 0x00000007);
simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE; simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE;
simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512 : SIMD_NONE; simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512: SIMD_NONE;
CPUID(cpuInfo, 0x80000001); CPUID(cpuInfo, 0x80000001);
simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE;
// clang-fotmat on
} }
SIMDFlags* SIMDFlags::instance() { SIMDFlags const* SIMDFlags::instance() {
static SIMDFlags instance; static SIMDFlags instance;
return &instance; return &instance;
} }
......
...@@ -11,33 +11,12 @@ limitations under the License. */ ...@@ -11,33 +11,12 @@ limitations under the License. */
#pragma once #pragma once
#include <iostream>
#include "DisableCopy.h" #include "DisableCopy.h"
namespace paddle { namespace paddle {
class SIMDFlags final { // clang-format off
public: enum simd_t {
DISABLE_COPY(SIMDFlags);
SIMDFlags();
static SIMDFlags* instance();
inline bool isSSE() const { return simd_flags_ & SIMD_SSE; }
inline bool isSSE2() const { return simd_flags_ & SIMD_SSE2; }
inline bool isSSE3() const { return simd_flags_ & SIMD_SSE3; }
inline bool isSSSE3() const { return simd_flags_ & SIMD_SSSE3; }
inline bool isSSE41() const { return simd_flags_ & SIMD_SSE41; }
inline bool isSSE42() const { return simd_flags_ & SIMD_SSE42; }
inline bool isFMA3() const { return simd_flags_ & SIMD_FMA3; }
inline bool isFMA4() const { return simd_flags_ & SIMD_FMA4; }
inline bool isAVX() const { return simd_flags_ & SIMD_AVX; }
inline bool isAVX2() const { return simd_flags_ & SIMD_AVX2; }
inline bool isAVX512() const { return simd_flags_ & SIMD_AVX512; }
private:
enum simd_t {
SIMD_NONE = 0, ///< None SIMD_NONE = 0, ///< None
SIMD_SSE = 1 << 0, ///< SSE SIMD_SSE = 1 << 0, ///< SSE
SIMD_SSE2 = 1 << 1, ///< SSE 2 SIMD_SSE2 = 1 << 1, ///< SSE 2
...@@ -50,22 +29,72 @@ private: ...@@ -50,22 +29,72 @@ private:
SIMD_AVX = 1 << 8, ///< AVX SIMD_AVX = 1 << 8, ///< AVX
SIMD_AVX2 = 1 << 9, ///< AVX 2 SIMD_AVX2 = 1 << 9, ///< AVX 2
SIMD_AVX512 = 1 << 10, ///< AVX 512 SIMD_AVX512 = 1 << 10, ///< AVX 512
}; };
// clang-format on
/// simd flags class SIMDFlags final {
public:
DISABLE_COPY(SIMDFlags);
SIMDFlags();
static SIMDFlags const* instance();
inline bool check(int flags) const {
return !((simd_flags_ & flags) ^ flags);
}
private:
int simd_flags_ = SIMD_NONE; int simd_flags_ = SIMD_NONE;
}; };
#define HAS_SSE SIMDFlags::instance()->isSSE() /**
#define HAS_SSE2 SIMDFlags::instance()->isSSE2() * @brief Check SIMD flags at runtime.
#define HAS_SSE3 SIMDFlags::instance()->isSSE3() *
#define HAS_SSSE3 SIMDFlags::instance()->isSSSE3() * For example.
#define HAS_SSE41 SIMDFlags::instance()->isSSE41() * @code{.cpp}
#define HAS_SSE42 SIMDFlags::instance()->isSSE42() *
#define HAS_FMA3 SIMDFlags::instance()->isFMA3() * if (HAS_SIMD(SIMD_AVX2 | SIMD_FMA4)) {
#define HAS_FMA4 SIMDFlags::instance()->isFMA4() * avx2_fm4_stub();
#define HAS_AVX SIMDFlags::instance()->isAVX() * } else if (HAS_SIMD(SIMD_AVX)) {
#define HAS_AVX2 SIMDFlags::instance()->isAVX2() * avx_stub();
#define HAS_AVX512 SIMDFlags::instance()->isAVX512() * }
*
* @endcode
*/
#define HAS_SIMD(__flags) SIMDFlags::instance()->check(__flags)
/**
* @brief Check SIMD flags at runtime.
*
* 1. Check all SIMD flags at runtime:
*
* @code{.cpp}
* if (HAS_AVX && HAS_AVX2) {
* avx2_stub();
* }
* @endcod
*
* 2. Check one SIMD flag at runtime:
*
* @code{.cpp}
* if (HAS_SSE41 || HAS_SSE42) {
* sse4_stub();
* }
* @endcode
*/
// clang-format off
#define HAS_SSE HAS_SIMD(SIMD_SSE)
#define HAS_SSE2 HAS_SIMD(SIMD_SSE2)
#define HAS_SSE3 HAS_SIMD(SIMD_SSE3)
#define HAS_SSSE3 HAS_SIMD(SIMD_SSSE3)
#define HAS_SSE41 HAS_SIMD(SIMD_SSE41)
#define HAS_SSE42 HAS_SIMD(SIMD_SSE42)
#define HAS_FMA3 HAS_SIMD(SIMD_FMA3)
#define HAS_FMA4 HAS_SIMD(SIMD_FMA4)
#define HAS_AVX HAS_SIMD(SIMD_AVX)
#define HAS_AVX2 HAS_SIMD(SIMD_AVX2)
#define HAS_AVX512 HAS_SIMD(SIMD_AVX512)
// clang-format on
} // namespace paddle } // namespace paddle
...@@ -19,6 +19,7 @@ using namespace paddle; // NOLINT ...@@ -19,6 +19,7 @@ using namespace paddle; // NOLINT
TEST(SIMDFlags, gccTest) { TEST(SIMDFlags, gccTest) {
#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)) #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__))
// clang-format off
CHECK(!__builtin_cpu_supports("sse") != HAS_SSE); CHECK(!__builtin_cpu_supports("sse") != HAS_SSE);
CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2); CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2);
CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3); CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3);
...@@ -27,25 +28,24 @@ TEST(SIMDFlags, gccTest) { ...@@ -27,25 +28,24 @@ TEST(SIMDFlags, gccTest) {
CHECK(!__builtin_cpu_supports("sse4.2") != HAS_SSE42); CHECK(!__builtin_cpu_supports("sse4.2") != HAS_SSE42);
CHECK(!__builtin_cpu_supports("avx") != HAS_AVX); CHECK(!__builtin_cpu_supports("avx") != HAS_AVX);
CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2); CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2);
// clang-format on
#endif #endif
} }
TEST(SIMDFlags, normalPrint) { TEST(SIMDFlags, normalPrint) {
auto simd = SIMDFlags::instance(); LOG(INFO) << "Has SSE: " << std::boolalpha << HAS_SSE;
LOG(INFO) << "Has SSE2: " << std::boolalpha << simd->isSSE2(); LOG(INFO) << "Has SSE2: " << std::boolalpha << HAS_SSE2;
LOG(INFO) << "Has SSE3: " << std::boolalpha << simd->isSSE3(); LOG(INFO) << "Has SSE3: " << std::boolalpha << HAS_SSE3;
LOG(INFO) << "Has SSSE3: " << std::boolalpha << simd->isSSSE3(); LOG(INFO) << "Has SSSE3: " << std::boolalpha << HAS_SSSE3;
LOG(INFO) << "Has SSE4.1: " << std::boolalpha << simd->isSSE41(); LOG(INFO) << "Has SSE4: " << std::boolalpha << HAS_SSE41 || HAS_SSE42;
LOG(INFO) << "Has SSE4.2: " << std::boolalpha << simd->isSSE42(); LOG(INFO) << "Has FMA3: " << std::boolalpha << HAS_FMA3;
LOG(INFO) << "Has FMA3: " << std::boolalpha << simd->isFMA3(); LOG(INFO) << "Has FMA4: " << std::boolalpha << HAS_FMA4;
LOG(INFO) << "Has FMA4: " << std::boolalpha << simd->isFMA4(); LOG(INFO) << "Has AVX: " << std::boolalpha << HAS_AVX;
LOG(INFO) << "Has AVX: " << std::boolalpha << simd->isAVX(); LOG(INFO) << "Has AVX2: " << std::boolalpha << HAS_AVX2;
LOG(INFO) << "Has AVX2: " << std::boolalpha << simd->isAVX2(); LOG(INFO) << "Has AVX512: " << std::boolalpha << HAS_AVX512;
LOG(INFO) << "Has AVX512: " << std::boolalpha << simd->isAVX512();
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
paddle::initMain(argc, argv);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册