From 02083bda40eb02d80f14c418ba069d2dd4a5a247 Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Fri, 28 Aug 2020 04:45:33 +0200 Subject: [PATCH] Add mkldnn bfloat16 option to C-API (#26676) * Add mkldnn bfloat16 option to C-API * Add test for bfloat16 gpu * Change coverage test --- go/paddle/config.go | 7 +++++++ paddle/fluid/inference/api/analysis_config.cc | 18 ++++++++++++++++ .../api/analysis_predictor_tester.cc | 21 +++++++++++++++++++ .../inference/api/paddle_analysis_config.h | 14 +++++++++++++ .../inference/api/paddle_pass_builder.cc | 12 +++++++++++ .../fluid/inference/api/paddle_pass_builder.h | 11 ++++++++++ paddle/fluid/inference/capi/paddle_c_api.h | 6 ++++++ paddle/fluid/inference/capi/pd_config.cc | 12 +++++++++++ .../tests/api/analyzer_capi_gpu_tester.cc | 3 +++ .../tests/api/analyzer_capi_tester.cc | 3 +++ paddle/fluid/pybind/inference_api.cc | 8 +++++-- 11 files changed, 113 insertions(+), 2 deletions(-) diff --git a/go/paddle/config.go b/go/paddle/config.go index cea69e716b..c4f39fa9c5 100644 --- a/go/paddle/config.go +++ b/go/paddle/config.go @@ -154,10 +154,17 @@ func (config *AnalysisConfig) EnableMkldnnQuantizer() { C.PD_EnableMkldnnQuantizer(config.c) } +func (config *AnalysisConfig) EnableMkldnnBfloat16() { + C.PD_EnableMkldnnBfloat16(config.c) +} + func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool { return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c)) } +func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool { + return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c)) +} // SetModelBuffer // ModelFromMemory diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 61886c225e..a1c1e6de5f 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -218,6 +218,17 @@ void AnalysisConfig::EnableMkldnnQuantizer() { Update(); } +void AnalysisConfig::EnableMkldnnBfloat16() { +#ifdef PADDLE_WITH_MKLDNN + use_mkldnn_bfloat16_ = true; +#else + LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnBfloat16"; + use_mkldnn_bfloat16_ = false; +#endif + + Update(); +} + MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, "MkldnnQuantizer was not enabled yet."); @@ -331,6 +342,12 @@ void AnalysisConfig::Update() { #endif } + if (use_mkldnn_bfloat16_) { +#ifdef PADDLE_WITH_MKLDNN + pass_builder()->EnableMkldnnBfloat16(); +#endif + } + #ifdef PADDLE_WITH_MKLDNN // Do not optimize when mkldnn is on if (enable_memory_optim_ && !use_mkldnn_) { @@ -399,6 +416,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << ";"; ss << use_mkldnn_quantizer_; + ss << use_mkldnn_bfloat16_; ss << model_from_memory_; ss << with_profile_; diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index dea448f9b0..5766919f08 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -485,4 +485,25 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) { } #endif +#ifdef PADDLE_WITH_CUDA +TEST(AnalysisPredictor, bf16_gpu_pass_strategy) { + AnalysisConfig config; + config.SetModel(FLAGS_dirname); + config.SwitchIrOptim(true); + config.EnableUseGpu(100, 0); + config.EnableMkldnnBfloat16(); +#ifdef PADDLE_WITH_MKLDNN + ASSERT_EQ(config.mkldnn_bfloat16_enabled(), true); +#else + ASSERT_EQ(config.mkldnn_bfloat16_enabled(), false); +#endif +} +#endif + +TEST(AnalysisPredictor, bf16_pass_strategy) { + std::vector passes; + PassStrategy passStrategy(passes); + passStrategy.EnableMkldnnBfloat16(); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 6a31ff281c..b1244e4e3d 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -401,6 +401,19 @@ struct PD_INFER_DECL AnalysisConfig { /// void EnableMkldnnQuantizer(); + /// + /// \brief Turn on MKLDNN bfloat16. + /// + /// + void EnableMkldnnBfloat16(); + + /// + /// \brief A boolean state telling whether to use the MKLDNN Bfloat16. + /// + /// \return bool Whether to use the MKLDNN Bfloat16. + /// + bool mkldnn_bfloat16_enabled() const { return use_mkldnn_bfloat16_; } + /// /// \brief A boolean state telling whether the thread local CUDA stream is /// enabled. @@ -592,6 +605,7 @@ struct PD_INFER_DECL AnalysisConfig { int mkldnn_cache_capacity_{0}; bool use_mkldnn_quantizer_{false}; std::shared_ptr mkldnn_quantizer_config_; + bool use_mkldnn_bfloat16_{false}; // If the config is already used on a predictor, it becomes invalid. // Any config can only be used with one predictor. diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index c07ac11e27..ffb70700b5 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -143,6 +143,10 @@ void GpuPassStrategy::EnableMkldnnQuantizer() { LOG(ERROR) << "GPU not support MKL-DNN quantization"; } +void GpuPassStrategy::EnableMkldnnBfloat16() { + LOG(ERROR) << "GPU not support MKL-DNN bfloat16"; +} + CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { // NOTE the large fusions should be located in the front, so that they will // not be damaged by smaller ones. @@ -223,4 +227,12 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { #endif } +void CpuPassStrategy::EnableMkldnnBfloat16() { +#ifdef PADDLE_WITH_MKLDNN + use_mkldnn_bfloat16_ = true; +#else + use_mkldnn_bfloat16_ = false; +#endif +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index c5a4a5f754..9073253520 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -132,6 +132,9 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \brief Enable MKLDNN quantize optimization. virtual void EnableMkldnnQuantizer() {} + /// \brief Enable MKLDNN bfloat16. + virtual void EnableMkldnnBfloat16() {} + /// \brief Check if we are using gpu. /// \return A bool variable implying whether we are in gpu mode. bool use_gpu() const { return use_gpu_; } @@ -161,6 +164,7 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy { use_gpu_ = other.use_gpu_; use_mkldnn_ = other.use_mkldnn_; use_mkldnn_quantizer_ = other.use_mkldnn_quantizer_; + use_mkldnn_bfloat16_ = other.use_mkldnn_bfloat16_; } /// \brief Default destructor. virtual ~CpuPassStrategy() = default; @@ -174,9 +178,13 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy { /// \brief Enable MKLDNN quantize optimization. void EnableMkldnnQuantizer() override; + /// \brief Enable MKLDNN bfloat16. + void EnableMkldnnBfloat16() override; + protected: /// \cond Protected bool use_mkldnn_quantizer_{false}; + bool use_mkldnn_bfloat16_{false}; /// \endcond }; @@ -205,6 +213,9 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy { /// \brief Not supported in GPU mode yet. void EnableMkldnnQuantizer() override; + /// \brief Not supported in GPU mode yet. + void EnableMkldnnBfloat16() override; + /// \brief Default destructor. virtual ~GpuPassStrategy() = default; diff --git a/paddle/fluid/inference/capi/paddle_c_api.h b/paddle/fluid/inference/capi/paddle_c_api.h index 4be6b48fb1..32129890d0 100644 --- a/paddle/fluid/inference/capi/paddle_c_api.h +++ b/paddle/fluid/inference/capi/paddle_c_api.h @@ -235,6 +235,12 @@ PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnQuantizer( PADDLE_CAPI_EXPORT extern bool PD_MkldnnQuantizerEnabled( const PD_AnalysisConfig* config); +PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnBfloat16( + PD_AnalysisConfig* config); + +PADDLE_CAPI_EXPORT extern bool PD_MkldnnBfloat16Enabled( + const PD_AnalysisConfig* config); + PADDLE_CAPI_EXPORT extern void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer, size_t prog_buffer_size, diff --git a/paddle/fluid/inference/capi/pd_config.cc b/paddle/fluid/inference/capi/pd_config.cc index f5445dd5a3..b99abc06b2 100644 --- a/paddle/fluid/inference/capi/pd_config.cc +++ b/paddle/fluid/inference/capi/pd_config.cc @@ -207,6 +207,18 @@ bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) { return config->config.mkldnn_quantizer_enabled(); } +void PD_EnableMkldnnBfloat16(PD_AnalysisConfig* config) { + PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound( + "PD_AnalysisConfig should not be null")); + config->config.EnableMkldnnBfloat16(); +} + +bool PD_MkldnnBfloat16Enabled(const PD_AnalysisConfig* config) { + PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound( + "PD_AnalysisConfig should not be null")); + return config->config.mkldnn_bfloat16_enabled(); +} + void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer, size_t prog_buffer_size, const char* params_buffer, size_t params_buffer_size) { diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc index c60e0a25f2..da0c93d21b 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc @@ -54,6 +54,9 @@ TEST(PD_AnalysisConfig, use_gpu) { PD_SwitchIrOptim(config, true); bool ir_optim = PD_IrOptim(config); CHECK(ir_optim) << "NO"; + PD_EnableMkldnnBfloat16(config); + bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); + CHECK(!bfloat16_enable) << "NO"; PD_EnableTensorRtEngine(config, 1 << 20, 1, 3, Precision::kFloat32, false, false); bool trt_enable = PD_TensorrtEngineEnabled(config); diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc index 93fcb43447..e24706691e 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc @@ -88,6 +88,9 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { PD_EnableMkldnnQuantizer(config); bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); CHECK(quantizer_enable) << "NO"; + PD_EnableMkldnnBfloat16(config); + bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); + CHECK(bfloat16_enable) << "NO"; PD_SetMkldnnCacheCapacity(config, 0); PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_DeleteAnalysisConfig(config); diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 696da67c9c..cf0dac022f 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -448,6 +448,7 @@ void BindAnalysisConfig(py::module *m) { &AnalysisConfig::cpu_math_library_num_threads) .def("to_native_config", &AnalysisConfig::ToNativeConfig) .def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer) + .def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16) #ifdef PADDLE_WITH_MKLDNN .def("quantizer_config", &AnalysisConfig::mkldnn_quantizer_config, py::return_value_policy::reference) @@ -565,6 +566,7 @@ void BindPaddlePassBuilder(py::module *m) { .def("enable_cudnn", &PassStrategy::EnableCUDNN) .def("enable_mkldnn", &PassStrategy::EnableMKLDNN) .def("enable_mkldnn_quantizer", &PassStrategy::EnableMkldnnQuantizer) + .def("enable_mkldnn_bfloat16", &PassStrategy::EnableMkldnnBfloat16) .def("use_gpu", &PassStrategy::use_gpu); py::class_(*m, "CpuPassStrategy") @@ -572,14 +574,16 @@ void BindPaddlePassBuilder(py::module *m) { .def(py::init()) .def("enable_cudnn", &CpuPassStrategy::EnableCUDNN) .def("enable_mkldnn", &CpuPassStrategy::EnableMKLDNN) - .def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer); + .def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer) + .def("enable_mkldnn_bfloat16", &CpuPassStrategy::EnableMkldnnBfloat16); py::class_(*m, "GpuPassStrategy") .def(py::init<>()) .def(py::init()) .def("enable_cudnn", &GpuPassStrategy::EnableCUDNN) .def("enable_mkldnn", &GpuPassStrategy::EnableMKLDNN) - .def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer); + .def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer) + .def("enable_mkldnn_bfloat16", &GpuPassStrategy::EnableMkldnnBfloat16); } } // namespace } // namespace pybind -- GitLab