From 73cdea01d49673ec3b759831930e38f8024f2e6d Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Thu, 4 Feb 2021 14:00:37 +0100 Subject: [PATCH] Add bf16 fast performance verification (#30551) * Update Xbyak and add bf16 fast performance verification * Fix formating * Change LOG message * Trigger an update of a new tag --- cmake/external/xbyak.cmake | 2 +- paddle/fluid/inference/api/analysis_config.cc | 4 ++++ paddle/fluid/platform/cpu_info.cc | 9 +++++++++ paddle/fluid/platform/cpu_info.h | 1 + paddle/fluid/pybind/pybind.cc | 12 ++++++++++++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake index 7d49322682..610a692ef1 100644 --- a/cmake/external/xbyak.cmake +++ b/cmake/external/xbyak.cmake @@ -44,7 +44,7 @@ ExternalProject_Add( DEPENDS "" PREFIX ${XBYAK_PREFIX_DIR} SOURCE_DIR ${XBYAK_SOURCE_DIR} - UPDATE_COMMAND "" + # UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT} ) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 167d083f3d..7eb1bb1a24 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -261,6 +261,10 @@ void AnalysisConfig::EnableMkldnnBfloat16() { #ifdef PADDLE_WITH_MKLDNN if (platform::MayIUse(platform::cpu_isa_t::avx512_core)) { use_mkldnn_bfloat16_ = true; + LOG(INFO) << "Hardware support for BFLOAT16" + << (platform::MayIUse(platform::cpu_isa_t::avx512_bf16) + ? " is enabled" + : " is disabled. Simulation will be used"); } else { LOG(INFO) << "CPU does not support BFLOAT16 calculations"; use_mkldnn_bfloat16_ = false; diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index b6d42f1c79..923c97350e 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -130,6 +130,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) { case avx512_mic_4ops: return true && MayIUse(avx512_mic) && cpu.has(Cpu::tAVX512_4FMAPS) && cpu.has(Cpu::tAVX512_4VNNIW); + case avx512_bf16: + return true && cpu.has(Cpu::tAVX512_BF16); case isa_any: return true; } @@ -173,6 +175,13 @@ bool MayIUse(const cpu_isa_t cpu_isa) { return ((reg[1] & avx512f_mask) && (reg[1] & avx512dq_mask) && (reg[1] & avx512bw_mask) && (reg[1] & avx512vl_mask)); } + // EAX = 7, ECX = 1 + cpuid(reg, 0x00010007); + if (cpu_isa == avx512_bf16) { + // AVX512BF16: EAX Bit 5 + int avx512bf16_mask = (1 << 5); + return (reg[0] & avx512bf16_mask) != 0; + } } #endif return false; diff --git a/paddle/fluid/platform/cpu_info.h b/paddle/fluid/platform/cpu_info.h index 3c74e6fb2a..94527149d4 100644 --- a/paddle/fluid/platform/cpu_info.h +++ b/paddle/fluid/platform/cpu_info.h @@ -83,6 +83,7 @@ typedef enum { avx512_core_vnni, avx512_mic, avx512_mic_4ops, + avx512_bf16, } cpu_isa_t; // Instruction set architecture // May I use some instruction diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 03a21b2992..745bda49ec 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -161,6 +161,17 @@ bool SupportsBfloat16() { #endif } +bool SupportsBfloat16FastPerformance() { +#ifndef PADDLE_WITH_MKLDNN + return false; +#else + if (platform::MayIUse(platform::cpu_isa_t::avx512_bf16)) + return true; + else + return false; +#endif +} + bool IsCompiledWithBrpc() { #ifndef PADDLE_WITH_DISTRIBUTE return false; @@ -1730,6 +1741,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("is_compiled_with_xpu", IsCompiledWithXPU); m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN); m.def("supports_bfloat16", SupportsBfloat16); + m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance); m.def("is_compiled_with_brpc", IsCompiledWithBrpc); m.def("is_compiled_with_dist", IsCompiledWithDIST); m.def("_cuda_synchronize", [](const platform::CUDAPlace &place) { -- GitLab