未验证 提交 73cdea01 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Add bf16 fast performance verification (#30551)

* Update Xbyak and add bf16 fast performance verification

* Fix formating

* Change LOG message

* Trigger an update of a new tag
上级 e6095bc2
......@@ -44,7 +44,7 @@ ExternalProject_Add(
DEPENDS ""
PREFIX ${XBYAK_PREFIX_DIR}
SOURCE_DIR ${XBYAK_SOURCE_DIR}
UPDATE_COMMAND ""
# UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT}
)
......
......@@ -261,6 +261,10 @@ void AnalysisConfig::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
if (platform::MayIUse(platform::cpu_isa_t::avx512_core)) {
use_mkldnn_bfloat16_ = true;
LOG(INFO) << "Hardware support for BFLOAT16"
<< (platform::MayIUse(platform::cpu_isa_t::avx512_bf16)
? " is enabled"
: " is disabled. Simulation will be used");
} else {
LOG(INFO) << "CPU does not support BFLOAT16 calculations";
use_mkldnn_bfloat16_ = false;
......
......@@ -130,6 +130,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
case avx512_mic_4ops:
return true && MayIUse(avx512_mic) && cpu.has(Cpu::tAVX512_4FMAPS) &&
cpu.has(Cpu::tAVX512_4VNNIW);
case avx512_bf16:
return true && cpu.has(Cpu::tAVX512_BF16);
case isa_any:
return true;
}
......@@ -173,6 +175,13 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
return ((reg[1] & avx512f_mask) && (reg[1] & avx512dq_mask) &&
(reg[1] & avx512bw_mask) && (reg[1] & avx512vl_mask));
}
// EAX = 7, ECX = 1
cpuid(reg, 0x00010007);
if (cpu_isa == avx512_bf16) {
// AVX512BF16: EAX Bit 5
int avx512bf16_mask = (1 << 5);
return (reg[0] & avx512bf16_mask) != 0;
}
}
#endif
return false;
......
......@@ -83,6 +83,7 @@ typedef enum {
avx512_core_vnni,
avx512_mic,
avx512_mic_4ops,
avx512_bf16,
} cpu_isa_t; // Instruction set architecture
// May I use some instruction
......
......@@ -161,6 +161,17 @@ bool SupportsBfloat16() {
#endif
}
bool SupportsBfloat16FastPerformance() {
#ifndef PADDLE_WITH_MKLDNN
return false;
#else
if (platform::MayIUse(platform::cpu_isa_t::avx512_bf16))
return true;
else
return false;
#endif
}
bool IsCompiledWithBrpc() {
#ifndef PADDLE_WITH_DISTRIBUTE
return false;
......@@ -1730,6 +1741,7 @@ All parameter, weight, gradient are variables in Paddle.
m.def("is_compiled_with_xpu", IsCompiledWithXPU);
m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN);
m.def("supports_bfloat16", SupportsBfloat16);
m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance);
m.def("is_compiled_with_brpc", IsCompiledWithBrpc);
m.def("is_compiled_with_dist", IsCompiledWithDIST);
m.def("_cuda_synchronize", [](const platform::CUDAPlace &place) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册