diff --git a/go/paddle/config.go b/go/paddle/config.go index c4f39fa9c5d627a689c064bbbd2178cd1ae1a929..cea69e716bffada9e5565eacf8ac1af84ae5b930 100644 --- a/go/paddle/config.go +++ b/go/paddle/config.go @@ -154,17 +154,10 @@ func (config *AnalysisConfig) EnableMkldnnQuantizer() { C.PD_EnableMkldnnQuantizer(config.c) } -func (config *AnalysisConfig) EnableMkldnnBfloat16() { - C.PD_EnableMkldnnBfloat16(config.c) -} - func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool { return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c)) } -func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool { - return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c)) -} // SetModelBuffer // ModelFromMemory diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 9fbc97d55090345af3b3b12bcd138bfaecd346cc..bb01e7009a56ca0fc36177704547a5ecdadbd9fb 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -217,17 +217,6 @@ void AnalysisConfig::EnableMkldnnQuantizer() { Update(); } -void AnalysisConfig::EnableMkldnnBfloat16() { -#ifdef PADDLE_WITH_MKLDNN - use_mkldnn_bfloat16_ = true; -#else - LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnBfloat16"; - use_mkldnn_bfloat16_ = false; -#endif - - Update(); -} - MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, "MkldnnQuantizer was not enabled yet."); @@ -341,12 +330,6 @@ void AnalysisConfig::Update() { #endif } - if (use_mkldnn_bfloat16_) { -#ifdef PADDLE_WITH_MKLDNN - pass_builder()->EnableMkldnnBfloat16(); -#endif - } - #ifdef PADDLE_WITH_MKLDNN // Do not optimize when mkldnn is on if (enable_memory_optim_ && !use_mkldnn_) { @@ -415,7 +398,6 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << ";"; ss << use_mkldnn_quantizer_; - ss << use_mkldnn_bfloat16_; ss << model_from_memory_; ss << with_profile_; diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index 5766919f08e68832886b88b867bc48afa288a955..dea448f9b03468eabda16d4375ea60348a09efb2 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -485,25 +485,4 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) { } #endif -#ifdef PADDLE_WITH_CUDA -TEST(AnalysisPredictor, bf16_gpu_pass_strategy) { - AnalysisConfig config; - config.SetModel(FLAGS_dirname); - config.SwitchIrOptim(true); - config.EnableUseGpu(100, 0); - config.EnableMkldnnBfloat16(); -#ifdef PADDLE_WITH_MKLDNN - ASSERT_EQ(config.mkldnn_bfloat16_enabled(), true); -#else - ASSERT_EQ(config.mkldnn_bfloat16_enabled(), false); -#endif -} -#endif - -TEST(AnalysisPredictor, bf16_pass_strategy) { - std::vector passes; - PassStrategy passStrategy(passes); - passStrategy.EnableMkldnnBfloat16(); -} - } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index b1244e4e3dfdd5e6a627054250e6def2a7c35a89..6a31ff281c68e3675d35c14059a453455ef398df 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -401,19 +401,6 @@ struct PD_INFER_DECL AnalysisConfig { /// void EnableMkldnnQuantizer(); - /// - /// \brief Turn on MKLDNN bfloat16. - /// - /// - void EnableMkldnnBfloat16(); - - /// - /// \brief A boolean state telling whether to use the MKLDNN Bfloat16. - /// - /// \return bool Whether to use the MKLDNN Bfloat16. - /// - bool mkldnn_bfloat16_enabled() const { return use_mkldnn_bfloat16_; } - /// /// \brief A boolean state telling whether the thread local CUDA stream is /// enabled. @@ -605,7 +592,6 @@ struct PD_INFER_DECL AnalysisConfig { int mkldnn_cache_capacity_{0}; bool use_mkldnn_quantizer_{false}; std::shared_ptr mkldnn_quantizer_config_; - bool use_mkldnn_bfloat16_{false}; // If the config is already used on a predictor, it becomes invalid. // Any config can only be used with one predictor. diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 98a36a3308dc539ee5aecad9e71f50be310e584c..43d0a2a9d0ad3ebf88feaae8df6208dc109f0b41 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -143,10 +143,6 @@ void GpuPassStrategy::EnableMkldnnQuantizer() { LOG(ERROR) << "GPU not support MKL-DNN quantization"; } -void GpuPassStrategy::EnableMkldnnBfloat16() { - LOG(ERROR) << "GPU not support MKL-DNN bfloat16"; -} - CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { // NOTE the large fusions should be located in the front, so that they will // not be damaged by smaller ones. @@ -229,12 +225,4 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { #endif } -void CpuPassStrategy::EnableMkldnnBfloat16() { -#ifdef PADDLE_WITH_MKLDNN - use_mkldnn_bfloat16_ = true; -#else - use_mkldnn_bfloat16_ = false; -#endif -} - } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 9073253520466a3711089bc7b7da04a9191e0a42..c5a4a5f754d031a8e8f88a96dd16c89fbe1b0fbb 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -132,9 +132,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \brief Enable MKLDNN quantize optimization. virtual void EnableMkldnnQuantizer() {} - /// \brief Enable MKLDNN bfloat16. - virtual void EnableMkldnnBfloat16() {} - /// \brief Check if we are using gpu. /// \return A bool variable implying whether we are in gpu mode. bool use_gpu() const { return use_gpu_; } @@ -164,7 +161,6 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy { use_gpu_ = other.use_gpu_; use_mkldnn_ = other.use_mkldnn_; use_mkldnn_quantizer_ = other.use_mkldnn_quantizer_; - use_mkldnn_bfloat16_ = other.use_mkldnn_bfloat16_; } /// \brief Default destructor. virtual ~CpuPassStrategy() = default; @@ -178,13 +174,9 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy { /// \brief Enable MKLDNN quantize optimization. void EnableMkldnnQuantizer() override; - /// \brief Enable MKLDNN bfloat16. - void EnableMkldnnBfloat16() override; - protected: /// \cond Protected bool use_mkldnn_quantizer_{false}; - bool use_mkldnn_bfloat16_{false}; /// \endcond }; @@ -213,9 +205,6 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy { /// \brief Not supported in GPU mode yet. void EnableMkldnnQuantizer() override; - /// \brief Not supported in GPU mode yet. - void EnableMkldnnBfloat16() override; - /// \brief Default destructor. virtual ~GpuPassStrategy() = default; diff --git a/paddle/fluid/inference/capi/paddle_c_api.h b/paddle/fluid/inference/capi/paddle_c_api.h index 32129890d02a2a0e0b357a6e0402d07b56bc6509..4be6b48fb1820dc3271de164e87387c73ee67da9 100644 --- a/paddle/fluid/inference/capi/paddle_c_api.h +++ b/paddle/fluid/inference/capi/paddle_c_api.h @@ -235,12 +235,6 @@ PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnQuantizer( PADDLE_CAPI_EXPORT extern bool PD_MkldnnQuantizerEnabled( const PD_AnalysisConfig* config); -PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnBfloat16( - PD_AnalysisConfig* config); - -PADDLE_CAPI_EXPORT extern bool PD_MkldnnBfloat16Enabled( - const PD_AnalysisConfig* config); - PADDLE_CAPI_EXPORT extern void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer, size_t prog_buffer_size, diff --git a/paddle/fluid/inference/capi/pd_config.cc b/paddle/fluid/inference/capi/pd_config.cc index b99abc06b27ecb9686b4c6e883aaaf8b3e592415..f5445dd5a3f9b6499045361a36fd6363a79ef560 100644 --- a/paddle/fluid/inference/capi/pd_config.cc +++ b/paddle/fluid/inference/capi/pd_config.cc @@ -207,18 +207,6 @@ bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) { return config->config.mkldnn_quantizer_enabled(); } -void PD_EnableMkldnnBfloat16(PD_AnalysisConfig* config) { - PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound( - "PD_AnalysisConfig should not be null")); - config->config.EnableMkldnnBfloat16(); -} - -bool PD_MkldnnBfloat16Enabled(const PD_AnalysisConfig* config) { - PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound( - "PD_AnalysisConfig should not be null")); - return config->config.mkldnn_bfloat16_enabled(); -} - void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer, size_t prog_buffer_size, const char* params_buffer, size_t params_buffer_size) { diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc index da0c93d21b7852e06b6805230078540063c2b243..c60e0a25f28c01c453276a8ef04eb79b35b7dda2 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc @@ -54,9 +54,6 @@ TEST(PD_AnalysisConfig, use_gpu) { PD_SwitchIrOptim(config, true); bool ir_optim = PD_IrOptim(config); CHECK(ir_optim) << "NO"; - PD_EnableMkldnnBfloat16(config); - bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); - CHECK(!bfloat16_enable) << "NO"; PD_EnableTensorRtEngine(config, 1 << 20, 1, 3, Precision::kFloat32, false, false); bool trt_enable = PD_TensorrtEngineEnabled(config); diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc index e24706691ed834ac4f49d924162035ec565d24ea..93fcb43447d01dcafa10d8c85234d243d5095d4e 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc @@ -88,9 +88,6 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { PD_EnableMkldnnQuantizer(config); bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); CHECK(quantizer_enable) << "NO"; - PD_EnableMkldnnBfloat16(config); - bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); - CHECK(bfloat16_enable) << "NO"; PD_SetMkldnnCacheCapacity(config, 0); PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_DeleteAnalysisConfig(config); diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 040dd313f1c538b5792538f9da04635ff805b9a8..c6be52d1cd082316b02372613b938adc0fa9d6d8 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -448,7 +448,6 @@ void BindAnalysisConfig(py::module *m) { &AnalysisConfig::cpu_math_library_num_threads) .def("to_native_config", &AnalysisConfig::ToNativeConfig) .def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer) - .def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16) #ifdef PADDLE_WITH_MKLDNN .def("quantizer_config", &AnalysisConfig::mkldnn_quantizer_config, py::return_value_policy::reference) @@ -566,7 +565,6 @@ void BindPaddlePassBuilder(py::module *m) { .def("enable_cudnn", &PassStrategy::EnableCUDNN) .def("enable_mkldnn", &PassStrategy::EnableMKLDNN) .def("enable_mkldnn_quantizer", &PassStrategy::EnableMkldnnQuantizer) - .def("enable_mkldnn_bfloat16", &PassStrategy::EnableMkldnnBfloat16) .def("use_gpu", &PassStrategy::use_gpu); py::class_(*m, "CpuPassStrategy") @@ -574,16 +572,14 @@ void BindPaddlePassBuilder(py::module *m) { .def(py::init()) .def("enable_cudnn", &CpuPassStrategy::EnableCUDNN) .def("enable_mkldnn", &CpuPassStrategy::EnableMKLDNN) - .def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer) - .def("enable_mkldnn_bfloat16", &CpuPassStrategy::EnableMkldnnBfloat16); + .def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer); py::class_(*m, "GpuPassStrategy") .def(py::init<>()) .def(py::init()) .def("enable_cudnn", &GpuPassStrategy::EnableCUDNN) .def("enable_mkldnn", &GpuPassStrategy::EnableMKLDNN) - .def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer) - .def("enable_mkldnn_bfloat16", &GpuPassStrategy::EnableMkldnnBfloat16); + .def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer); } } // namespace } // namespace pybind