diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake index 7d493226821b2eff608d6cfaa0418cd319fe104e..610a692ef12c6ae6f992fff8e4e65f48f3aeb01f 100644 --- a/cmake/external/xbyak.cmake +++ b/cmake/external/xbyak.cmake @@ -44,7 +44,7 @@ ExternalProject_Add( DEPENDS "" PREFIX ${XBYAK_PREFIX_DIR} SOURCE_DIR ${XBYAK_SOURCE_DIR} - UPDATE_COMMAND "" + # UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT} ) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 167d083f3d47f75b003107d3f873c5f111a3b337..7eb1bb1a24e2441fe72fcbffd231e267688899eb 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -261,6 +261,10 @@ void AnalysisConfig::EnableMkldnnBfloat16() { #ifdef PADDLE_WITH_MKLDNN if (platform::MayIUse(platform::cpu_isa_t::avx512_core)) { use_mkldnn_bfloat16_ = true; + LOG(INFO) << "Hardware support for BFLOAT16" + << (platform::MayIUse(platform::cpu_isa_t::avx512_bf16) + ? " is enabled" + : " is disabled. Simulation will be used"); } else { LOG(INFO) << "CPU does not support BFLOAT16 calculations"; use_mkldnn_bfloat16_ = false; diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index b6d42f1c790649bc9105ff7004dbcc2a109ec8d0..923c97350e89ea9a3de01120bb7df57766247a38 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -130,6 +130,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) { case avx512_mic_4ops: return true && MayIUse(avx512_mic) && cpu.has(Cpu::tAVX512_4FMAPS) && cpu.has(Cpu::tAVX512_4VNNIW); + case avx512_bf16: + return true && cpu.has(Cpu::tAVX512_BF16); case isa_any: return true; } @@ -173,6 +175,13 @@ bool MayIUse(const cpu_isa_t cpu_isa) { return ((reg[1] & avx512f_mask) && (reg[1] & avx512dq_mask) && (reg[1] & avx512bw_mask) && (reg[1] & avx512vl_mask)); } + // EAX = 7, ECX = 1 + cpuid(reg, 0x00010007); + if (cpu_isa == avx512_bf16) { + // AVX512BF16: EAX Bit 5 + int avx512bf16_mask = (1 << 5); + return (reg[0] & avx512bf16_mask) != 0; + } } #endif return false; diff --git a/paddle/fluid/platform/cpu_info.h b/paddle/fluid/platform/cpu_info.h index 3c74e6fb2acb0a15cb2e034f1bcb2871578eae01..94527149d4e0b459dee03375d56fb0a9526aa055 100644 --- a/paddle/fluid/platform/cpu_info.h +++ b/paddle/fluid/platform/cpu_info.h @@ -83,6 +83,7 @@ typedef enum { avx512_core_vnni, avx512_mic, avx512_mic_4ops, + avx512_bf16, } cpu_isa_t; // Instruction set architecture // May I use some instruction diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 03a21b29921de17799da580e31130c5ca9134729..745bda49ecfa09d9130abbd9f92732b0b45bf01f 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -161,6 +161,17 @@ bool SupportsBfloat16() { #endif } +bool SupportsBfloat16FastPerformance() { +#ifndef PADDLE_WITH_MKLDNN + return false; +#else + if (platform::MayIUse(platform::cpu_isa_t::avx512_bf16)) + return true; + else + return false; +#endif +} + bool IsCompiledWithBrpc() { #ifndef PADDLE_WITH_DISTRIBUTE return false; @@ -1730,6 +1741,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("is_compiled_with_xpu", IsCompiledWithXPU); m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN); m.def("supports_bfloat16", SupportsBfloat16); + m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance); m.def("is_compiled_with_brpc", IsCompiledWithBrpc); m.def("is_compiled_with_dist", IsCompiledWithDIST); m.def("_cuda_synchronize", [](const platform::CUDAPlace &place) {