提交 c1299e7f 编写于 作者: G gangliao 提交者: GitHub

Merge pull request #804 from gangliao/check_avx

Add inline and bit manipulation in CpuId.h
......@@ -14,6 +14,8 @@ limitations under the License. */
#ifdef _WIN32
#include <intrin.h>
/// for MSVC
#define CPUID(info, x) __cpuidex(info, x, 0)
......@@ -31,25 +33,27 @@ namespace paddle {
SIMDFlags::SIMDFlags() {
unsigned int cpuInfo[4];
// CPUID: https://en.wikipedia.org/wiki/CPUID
// clang-format off
CPUID(cpuInfo, 0x00000001);
simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE;
simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 0) ? SIMD_SSE3 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 9) ? SIMD_SSSE3 : SIMD_NONE;
simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE;
simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 0) ? SIMD_SSE3 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 9) ? SIMD_SSSE3 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 19) ? SIMD_SSE41 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 20) ? SIMD_SSE42 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX : SIMD_NONE;
CPUID(cpuInfo, 0x00000007);
simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE;
simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512 : SIMD_NONE;
simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE;
simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512: SIMD_NONE;
CPUID(cpuInfo, 0x80000001);
simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE;
simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE;
// clang-fotmat on
}
SIMDFlags* SIMDFlags::instance() {
SIMDFlags const* SIMDFlags::instance() {
static SIMDFlags instance;
return &instance;
}
......
......@@ -11,61 +11,90 @@ limitations under the License. */
#pragma once
#include <iostream>
#include "DisableCopy.h"
namespace paddle {
// clang-format off
enum simd_t {
SIMD_NONE = 0, ///< None
SIMD_SSE = 1 << 0, ///< SSE
SIMD_SSE2 = 1 << 1, ///< SSE 2
SIMD_SSE3 = 1 << 2, ///< SSE 3
SIMD_SSSE3 = 1 << 3, ///< SSSE 3
SIMD_SSE41 = 1 << 4, ///< SSE 4.1
SIMD_SSE42 = 1 << 5, ///< SSE 4.2
SIMD_FMA3 = 1 << 6, ///< FMA 3
SIMD_FMA4 = 1 << 7, ///< FMA 4
SIMD_AVX = 1 << 8, ///< AVX
SIMD_AVX2 = 1 << 9, ///< AVX 2
SIMD_AVX512 = 1 << 10, ///< AVX 512
};
// clang-format on
class SIMDFlags final {
public:
DISABLE_COPY(SIMDFlags);
SIMDFlags();
static SIMDFlags* instance();
static SIMDFlags const* instance();
inline bool isSSE() const { return simd_flags_ & SIMD_SSE; }
inline bool isSSE2() const { return simd_flags_ & SIMD_SSE2; }
inline bool isSSE3() const { return simd_flags_ & SIMD_SSE3; }
inline bool isSSSE3() const { return simd_flags_ & SIMD_SSSE3; }
inline bool isSSE41() const { return simd_flags_ & SIMD_SSE41; }
inline bool isSSE42() const { return simd_flags_ & SIMD_SSE42; }
inline bool isFMA3() const { return simd_flags_ & SIMD_FMA3; }
inline bool isFMA4() const { return simd_flags_ & SIMD_FMA4; }
inline bool isAVX() const { return simd_flags_ & SIMD_AVX; }
inline bool isAVX2() const { return simd_flags_ & SIMD_AVX2; }
inline bool isAVX512() const { return simd_flags_ & SIMD_AVX512; }
inline bool check(int flags) const {
return !((simd_flags_ & flags) ^ flags);
}
private:
enum simd_t {
SIMD_NONE = 0, ///< None
SIMD_SSE = 1 << 0, ///< SSE
SIMD_SSE2 = 1 << 1, ///< SSE 2
SIMD_SSE3 = 1 << 2, ///< SSE 3
SIMD_SSSE3 = 1 << 3, ///< SSSE 3
SIMD_SSE41 = 1 << 4, ///< SSE 4.1
SIMD_SSE42 = 1 << 5, ///< SSE 4.2
SIMD_FMA3 = 1 << 6, ///< FMA 3
SIMD_FMA4 = 1 << 7, ///< FMA 4
SIMD_AVX = 1 << 8, ///< AVX
SIMD_AVX2 = 1 << 9, ///< AVX 2
SIMD_AVX512 = 1 << 10, ///< AVX 512
};
/// simd flags
int simd_flags_ = SIMD_NONE;
};
#define HAS_SSE SIMDFlags::instance()->isSSE()
#define HAS_SSE2 SIMDFlags::instance()->isSSE2()
#define HAS_SSE3 SIMDFlags::instance()->isSSE3()
#define HAS_SSSE3 SIMDFlags::instance()->isSSSE3()
#define HAS_SSE41 SIMDFlags::instance()->isSSE41()
#define HAS_SSE42 SIMDFlags::instance()->isSSE42()
#define HAS_FMA3 SIMDFlags::instance()->isFMA3()
#define HAS_FMA4 SIMDFlags::instance()->isFMA4()
#define HAS_AVX SIMDFlags::instance()->isAVX()
#define HAS_AVX2 SIMDFlags::instance()->isAVX2()
#define HAS_AVX512 SIMDFlags::instance()->isAVX512()
/**
* @brief Check SIMD flags at runtime.
*
* For example.
* @code{.cpp}
*
* if (HAS_SIMD(SIMD_AVX2 | SIMD_FMA4)) {
* avx2_fm4_stub();
* } else if (HAS_SIMD(SIMD_AVX)) {
* avx_stub();
* }
*
* @endcode
*/
#define HAS_SIMD(__flags) SIMDFlags::instance()->check(__flags)
/**
* @brief Check SIMD flags at runtime.
*
* 1. Check all SIMD flags at runtime:
*
* @code{.cpp}
* if (HAS_AVX && HAS_AVX2) {
* avx2_stub();
* }
* @endcod
*
* 2. Check one SIMD flag at runtime:
*
* @code{.cpp}
* if (HAS_SSE41 || HAS_SSE42) {
* sse4_stub();
* }
* @endcode
*/
// clang-format off
#define HAS_SSE HAS_SIMD(SIMD_SSE)
#define HAS_SSE2 HAS_SIMD(SIMD_SSE2)
#define HAS_SSE3 HAS_SIMD(SIMD_SSE3)
#define HAS_SSSE3 HAS_SIMD(SIMD_SSSE3)
#define HAS_SSE41 HAS_SIMD(SIMD_SSE41)
#define HAS_SSE42 HAS_SIMD(SIMD_SSE42)
#define HAS_FMA3 HAS_SIMD(SIMD_FMA3)
#define HAS_FMA4 HAS_SIMD(SIMD_FMA4)
#define HAS_AVX HAS_SIMD(SIMD_AVX)
#define HAS_AVX2 HAS_SIMD(SIMD_AVX2)
#define HAS_AVX512 HAS_SIMD(SIMD_AVX512)
// clang-format on
} // namespace paddle
......@@ -19,33 +19,33 @@ using namespace paddle; // NOLINT
TEST(SIMDFlags, gccTest) {
#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__))
CHECK(!__builtin_cpu_supports("sse") != HAS_SSE);
CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2);
CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3);
CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3);
// clang-format off
CHECK(!__builtin_cpu_supports("sse") != HAS_SSE);
CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2);
CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3);
CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3);
CHECK(!__builtin_cpu_supports("sse4.1") != HAS_SSE41);
CHECK(!__builtin_cpu_supports("sse4.2") != HAS_SSE42);
CHECK(!__builtin_cpu_supports("avx") != HAS_AVX);
CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2);
CHECK(!__builtin_cpu_supports("avx") != HAS_AVX);
CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2);
// clang-format on
#endif
}
TEST(SIMDFlags, normalPrint) {
auto simd = SIMDFlags::instance();
LOG(INFO) << "Has SSE2: " << std::boolalpha << simd->isSSE2();
LOG(INFO) << "Has SSE3: " << std::boolalpha << simd->isSSE3();
LOG(INFO) << "Has SSSE3: " << std::boolalpha << simd->isSSSE3();
LOG(INFO) << "Has SSE4.1: " << std::boolalpha << simd->isSSE41();
LOG(INFO) << "Has SSE4.2: " << std::boolalpha << simd->isSSE42();
LOG(INFO) << "Has FMA3: " << std::boolalpha << simd->isFMA3();
LOG(INFO) << "Has FMA4: " << std::boolalpha << simd->isFMA4();
LOG(INFO) << "Has AVX: " << std::boolalpha << simd->isAVX();
LOG(INFO) << "Has AVX2: " << std::boolalpha << simd->isAVX2();
LOG(INFO) << "Has AVX512: " << std::boolalpha << simd->isAVX512();
LOG(INFO) << "Has SSE: " << std::boolalpha << HAS_SSE;
LOG(INFO) << "Has SSE2: " << std::boolalpha << HAS_SSE2;
LOG(INFO) << "Has SSE3: " << std::boolalpha << HAS_SSE3;
LOG(INFO) << "Has SSSE3: " << std::boolalpha << HAS_SSSE3;
LOG(INFO) << "Has SSE4: " << std::boolalpha << HAS_SSE41 || HAS_SSE42;
LOG(INFO) << "Has FMA3: " << std::boolalpha << HAS_FMA3;
LOG(INFO) << "Has FMA4: " << std::boolalpha << HAS_FMA4;
LOG(INFO) << "Has AVX: " << std::boolalpha << HAS_AVX;
LOG(INFO) << "Has AVX2: " << std::boolalpha << HAS_AVX2;
LOG(INFO) << "Has AVX512: " << std::boolalpha << HAS_AVX512;
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
paddle::initMain(argc, argv);
return RUN_ALL_TESTS();
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册