未验证 提交 73cdea01 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Add bf16 fast performance verification (#30551)

* Update Xbyak and add bf16 fast performance verification

* Fix formating

* Change LOG message

* Trigger an update of a new tag
上级 e6095bc2
...@@ -44,7 +44,7 @@ ExternalProject_Add( ...@@ -44,7 +44,7 @@ ExternalProject_Add(
DEPENDS "" DEPENDS ""
PREFIX ${XBYAK_PREFIX_DIR} PREFIX ${XBYAK_PREFIX_DIR}
SOURCE_DIR ${XBYAK_SOURCE_DIR} SOURCE_DIR ${XBYAK_SOURCE_DIR}
UPDATE_COMMAND "" # UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT}
) )
......
...@@ -261,6 +261,10 @@ void AnalysisConfig::EnableMkldnnBfloat16() { ...@@ -261,6 +261,10 @@ void AnalysisConfig::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (platform::MayIUse(platform::cpu_isa_t::avx512_core)) { if (platform::MayIUse(platform::cpu_isa_t::avx512_core)) {
use_mkldnn_bfloat16_ = true; use_mkldnn_bfloat16_ = true;
LOG(INFO) << "Hardware support for BFLOAT16"
<< (platform::MayIUse(platform::cpu_isa_t::avx512_bf16)
? " is enabled"
: " is disabled. Simulation will be used");
} else { } else {
LOG(INFO) << "CPU does not support BFLOAT16 calculations"; LOG(INFO) << "CPU does not support BFLOAT16 calculations";
use_mkldnn_bfloat16_ = false; use_mkldnn_bfloat16_ = false;
......
...@@ -130,6 +130,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) { ...@@ -130,6 +130,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
case avx512_mic_4ops: case avx512_mic_4ops:
return true && MayIUse(avx512_mic) && cpu.has(Cpu::tAVX512_4FMAPS) && return true && MayIUse(avx512_mic) && cpu.has(Cpu::tAVX512_4FMAPS) &&
cpu.has(Cpu::tAVX512_4VNNIW); cpu.has(Cpu::tAVX512_4VNNIW);
case avx512_bf16:
return true && cpu.has(Cpu::tAVX512_BF16);
case isa_any: case isa_any:
return true; return true;
} }
...@@ -173,6 +175,13 @@ bool MayIUse(const cpu_isa_t cpu_isa) { ...@@ -173,6 +175,13 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
return ((reg[1] & avx512f_mask) && (reg[1] & avx512dq_mask) && return ((reg[1] & avx512f_mask) && (reg[1] & avx512dq_mask) &&
(reg[1] & avx512bw_mask) && (reg[1] & avx512vl_mask)); (reg[1] & avx512bw_mask) && (reg[1] & avx512vl_mask));
} }
// EAX = 7, ECX = 1
cpuid(reg, 0x00010007);
if (cpu_isa == avx512_bf16) {
// AVX512BF16: EAX Bit 5
int avx512bf16_mask = (1 << 5);
return (reg[0] & avx512bf16_mask) != 0;
}
} }
#endif #endif
return false; return false;
......
...@@ -83,6 +83,7 @@ typedef enum { ...@@ -83,6 +83,7 @@ typedef enum {
avx512_core_vnni, avx512_core_vnni,
avx512_mic, avx512_mic,
avx512_mic_4ops, avx512_mic_4ops,
avx512_bf16,
} cpu_isa_t; // Instruction set architecture } cpu_isa_t; // Instruction set architecture
// May I use some instruction // May I use some instruction
......
...@@ -161,6 +161,17 @@ bool SupportsBfloat16() { ...@@ -161,6 +161,17 @@ bool SupportsBfloat16() {
#endif #endif
} }
bool SupportsBfloat16FastPerformance() {
#ifndef PADDLE_WITH_MKLDNN
return false;
#else
if (platform::MayIUse(platform::cpu_isa_t::avx512_bf16))
return true;
else
return false;
#endif
}
bool IsCompiledWithBrpc() { bool IsCompiledWithBrpc() {
#ifndef PADDLE_WITH_DISTRIBUTE #ifndef PADDLE_WITH_DISTRIBUTE
return false; return false;
...@@ -1730,6 +1741,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1730,6 +1741,7 @@ All parameter, weight, gradient are variables in Paddle.
m.def("is_compiled_with_xpu", IsCompiledWithXPU); m.def("is_compiled_with_xpu", IsCompiledWithXPU);
m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN); m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN);
m.def("supports_bfloat16", SupportsBfloat16); m.def("supports_bfloat16", SupportsBfloat16);
m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance);
m.def("is_compiled_with_brpc", IsCompiledWithBrpc); m.def("is_compiled_with_brpc", IsCompiledWithBrpc);
m.def("is_compiled_with_dist", IsCompiledWithDIST); m.def("is_compiled_with_dist", IsCompiledWithDIST);
m.def("_cuda_synchronize", [](const platform::CUDAPlace &place) { m.def("_cuda_synchronize", [](const platform::CUDAPlace &place) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册