未验证 提交 02083bda 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Add mkldnn bfloat16 option to C-API (#26676)

* Add mkldnn bfloat16 option to C-API

* Add test for bfloat16 gpu

* Change coverage test
上级 5f275aad
...@@ -154,10 +154,17 @@ func (config *AnalysisConfig) EnableMkldnnQuantizer() { ...@@ -154,10 +154,17 @@ func (config *AnalysisConfig) EnableMkldnnQuantizer() {
C.PD_EnableMkldnnQuantizer(config.c) C.PD_EnableMkldnnQuantizer(config.c)
} }
func (config *AnalysisConfig) EnableMkldnnBfloat16() {
C.PD_EnableMkldnnBfloat16(config.c)
}
func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool { func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c)) return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
} }
func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c))
}
// SetModelBuffer // SetModelBuffer
// ModelFromMemory // ModelFromMemory
......
...@@ -218,6 +218,17 @@ void AnalysisConfig::EnableMkldnnQuantizer() { ...@@ -218,6 +218,17 @@ void AnalysisConfig::EnableMkldnnQuantizer() {
Update(); Update();
} }
void AnalysisConfig::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
use_mkldnn_bfloat16_ = true;
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnBfloat16";
use_mkldnn_bfloat16_ = false;
#endif
Update();
}
MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet."); "MkldnnQuantizer was not enabled yet.");
...@@ -331,6 +342,12 @@ void AnalysisConfig::Update() { ...@@ -331,6 +342,12 @@ void AnalysisConfig::Update() {
#endif #endif
} }
if (use_mkldnn_bfloat16_) {
#ifdef PADDLE_WITH_MKLDNN
pass_builder()->EnableMkldnnBfloat16();
#endif
}
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// Do not optimize when mkldnn is on // Do not optimize when mkldnn is on
if (enable_memory_optim_ && !use_mkldnn_) { if (enable_memory_optim_ && !use_mkldnn_) {
...@@ -399,6 +416,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -399,6 +416,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << ";"; ss << ";";
ss << use_mkldnn_quantizer_; ss << use_mkldnn_quantizer_;
ss << use_mkldnn_bfloat16_;
ss << model_from_memory_; ss << model_from_memory_;
ss << with_profile_; ss << with_profile_;
......
...@@ -485,4 +485,25 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) { ...@@ -485,4 +485,25 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) {
} }
#endif #endif
#ifdef PADDLE_WITH_CUDA
TEST(AnalysisPredictor, bf16_gpu_pass_strategy) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
config.SwitchIrOptim(true);
config.EnableUseGpu(100, 0);
config.EnableMkldnnBfloat16();
#ifdef PADDLE_WITH_MKLDNN
ASSERT_EQ(config.mkldnn_bfloat16_enabled(), true);
#else
ASSERT_EQ(config.mkldnn_bfloat16_enabled(), false);
#endif
}
#endif
TEST(AnalysisPredictor, bf16_pass_strategy) {
std::vector<std::string> passes;
PassStrategy passStrategy(passes);
passStrategy.EnableMkldnnBfloat16();
}
} // namespace paddle } // namespace paddle
...@@ -401,6 +401,19 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -401,6 +401,19 @@ struct PD_INFER_DECL AnalysisConfig {
/// ///
void EnableMkldnnQuantizer(); void EnableMkldnnQuantizer();
///
/// \brief Turn on MKLDNN bfloat16.
///
///
void EnableMkldnnBfloat16();
///
/// \brief A boolean state telling whether to use the MKLDNN Bfloat16.
///
/// \return bool Whether to use the MKLDNN Bfloat16.
///
bool mkldnn_bfloat16_enabled() const { return use_mkldnn_bfloat16_; }
/// ///
/// \brief A boolean state telling whether the thread local CUDA stream is /// \brief A boolean state telling whether the thread local CUDA stream is
/// enabled. /// enabled.
...@@ -592,6 +605,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -592,6 +605,7 @@ struct PD_INFER_DECL AnalysisConfig {
int mkldnn_cache_capacity_{0}; int mkldnn_cache_capacity_{0};
bool use_mkldnn_quantizer_{false}; bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_; std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
bool use_mkldnn_bfloat16_{false};
// If the config is already used on a predictor, it becomes invalid. // If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor. // Any config can only be used with one predictor.
......
...@@ -143,6 +143,10 @@ void GpuPassStrategy::EnableMkldnnQuantizer() { ...@@ -143,6 +143,10 @@ void GpuPassStrategy::EnableMkldnnQuantizer() {
LOG(ERROR) << "GPU not support MKL-DNN quantization"; LOG(ERROR) << "GPU not support MKL-DNN quantization";
} }
void GpuPassStrategy::EnableMkldnnBfloat16() {
LOG(ERROR) << "GPU not support MKL-DNN bfloat16";
}
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// NOTE the large fusions should be located in the front, so that they will // NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones. // not be damaged by smaller ones.
...@@ -223,4 +227,12 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { ...@@ -223,4 +227,12 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
#endif #endif
} }
void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
use_mkldnn_bfloat16_ = true;
#else
use_mkldnn_bfloat16_ = false;
#endif
}
} // namespace paddle } // namespace paddle
...@@ -132,6 +132,9 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { ...@@ -132,6 +132,9 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \brief Enable MKLDNN quantize optimization. /// \brief Enable MKLDNN quantize optimization.
virtual void EnableMkldnnQuantizer() {} virtual void EnableMkldnnQuantizer() {}
/// \brief Enable MKLDNN bfloat16.
virtual void EnableMkldnnBfloat16() {}
/// \brief Check if we are using gpu. /// \brief Check if we are using gpu.
/// \return A bool variable implying whether we are in gpu mode. /// \return A bool variable implying whether we are in gpu mode.
bool use_gpu() const { return use_gpu_; } bool use_gpu() const { return use_gpu_; }
...@@ -161,6 +164,7 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy { ...@@ -161,6 +164,7 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy {
use_gpu_ = other.use_gpu_; use_gpu_ = other.use_gpu_;
use_mkldnn_ = other.use_mkldnn_; use_mkldnn_ = other.use_mkldnn_;
use_mkldnn_quantizer_ = other.use_mkldnn_quantizer_; use_mkldnn_quantizer_ = other.use_mkldnn_quantizer_;
use_mkldnn_bfloat16_ = other.use_mkldnn_bfloat16_;
} }
/// \brief Default destructor. /// \brief Default destructor.
virtual ~CpuPassStrategy() = default; virtual ~CpuPassStrategy() = default;
...@@ -174,9 +178,13 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy { ...@@ -174,9 +178,13 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy {
/// \brief Enable MKLDNN quantize optimization. /// \brief Enable MKLDNN quantize optimization.
void EnableMkldnnQuantizer() override; void EnableMkldnnQuantizer() override;
/// \brief Enable MKLDNN bfloat16.
void EnableMkldnnBfloat16() override;
protected: protected:
/// \cond Protected /// \cond Protected
bool use_mkldnn_quantizer_{false}; bool use_mkldnn_quantizer_{false};
bool use_mkldnn_bfloat16_{false};
/// \endcond /// \endcond
}; };
...@@ -205,6 +213,9 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy { ...@@ -205,6 +213,9 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
/// \brief Not supported in GPU mode yet. /// \brief Not supported in GPU mode yet.
void EnableMkldnnQuantizer() override; void EnableMkldnnQuantizer() override;
/// \brief Not supported in GPU mode yet.
void EnableMkldnnBfloat16() override;
/// \brief Default destructor. /// \brief Default destructor.
virtual ~GpuPassStrategy() = default; virtual ~GpuPassStrategy() = default;
......
...@@ -235,6 +235,12 @@ PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnQuantizer( ...@@ -235,6 +235,12 @@ PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnQuantizer(
PADDLE_CAPI_EXPORT extern bool PD_MkldnnQuantizerEnabled( PADDLE_CAPI_EXPORT extern bool PD_MkldnnQuantizerEnabled(
const PD_AnalysisConfig* config); const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnBfloat16(
PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern bool PD_MkldnnBfloat16Enabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SetModelBuffer(PD_AnalysisConfig* config, PADDLE_CAPI_EXPORT extern void PD_SetModelBuffer(PD_AnalysisConfig* config,
const char* prog_buffer, const char* prog_buffer,
size_t prog_buffer_size, size_t prog_buffer_size,
......
...@@ -207,6 +207,18 @@ bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) { ...@@ -207,6 +207,18 @@ bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) {
return config->config.mkldnn_quantizer_enabled(); return config->config.mkldnn_quantizer_enabled();
} }
void PD_EnableMkldnnBfloat16(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound(
"PD_AnalysisConfig should not be null"));
config->config.EnableMkldnnBfloat16();
}
bool PD_MkldnnBfloat16Enabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound(
"PD_AnalysisConfig should not be null"));
return config->config.mkldnn_bfloat16_enabled();
}
void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer, void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer,
size_t prog_buffer_size, const char* params_buffer, size_t prog_buffer_size, const char* params_buffer,
size_t params_buffer_size) { size_t params_buffer_size) {
......
...@@ -54,6 +54,9 @@ TEST(PD_AnalysisConfig, use_gpu) { ...@@ -54,6 +54,9 @@ TEST(PD_AnalysisConfig, use_gpu) {
PD_SwitchIrOptim(config, true); PD_SwitchIrOptim(config, true);
bool ir_optim = PD_IrOptim(config); bool ir_optim = PD_IrOptim(config);
CHECK(ir_optim) << "NO"; CHECK(ir_optim) << "NO";
PD_EnableMkldnnBfloat16(config);
bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config);
CHECK(!bfloat16_enable) << "NO";
PD_EnableTensorRtEngine(config, 1 << 20, 1, 3, Precision::kFloat32, false, PD_EnableTensorRtEngine(config, 1 << 20, 1, 3, Precision::kFloat32, false,
false); false);
bool trt_enable = PD_TensorrtEngineEnabled(config); bool trt_enable = PD_TensorrtEngineEnabled(config);
......
...@@ -88,6 +88,9 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { ...@@ -88,6 +88,9 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
PD_EnableMkldnnQuantizer(config); PD_EnableMkldnnQuantizer(config);
bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
CHECK(quantizer_enable) << "NO"; CHECK(quantizer_enable) << "NO";
PD_EnableMkldnnBfloat16(config);
bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config);
CHECK(bfloat16_enable) << "NO";
PD_SetMkldnnCacheCapacity(config, 0); PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_DeleteAnalysisConfig(config); PD_DeleteAnalysisConfig(config);
......
...@@ -448,6 +448,7 @@ void BindAnalysisConfig(py::module *m) { ...@@ -448,6 +448,7 @@ void BindAnalysisConfig(py::module *m) {
&AnalysisConfig::cpu_math_library_num_threads) &AnalysisConfig::cpu_math_library_num_threads)
.def("to_native_config", &AnalysisConfig::ToNativeConfig) .def("to_native_config", &AnalysisConfig::ToNativeConfig)
.def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer) .def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16)
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
.def("quantizer_config", &AnalysisConfig::mkldnn_quantizer_config, .def("quantizer_config", &AnalysisConfig::mkldnn_quantizer_config,
py::return_value_policy::reference) py::return_value_policy::reference)
...@@ -565,6 +566,7 @@ void BindPaddlePassBuilder(py::module *m) { ...@@ -565,6 +566,7 @@ void BindPaddlePassBuilder(py::module *m) {
.def("enable_cudnn", &PassStrategy::EnableCUDNN) .def("enable_cudnn", &PassStrategy::EnableCUDNN)
.def("enable_mkldnn", &PassStrategy::EnableMKLDNN) .def("enable_mkldnn", &PassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &PassStrategy::EnableMkldnnQuantizer) .def("enable_mkldnn_quantizer", &PassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &PassStrategy::EnableMkldnnBfloat16)
.def("use_gpu", &PassStrategy::use_gpu); .def("use_gpu", &PassStrategy::use_gpu);
py::class_<CpuPassStrategy, PassStrategy>(*m, "CpuPassStrategy") py::class_<CpuPassStrategy, PassStrategy>(*m, "CpuPassStrategy")
...@@ -572,14 +574,16 @@ void BindPaddlePassBuilder(py::module *m) { ...@@ -572,14 +574,16 @@ void BindPaddlePassBuilder(py::module *m) {
.def(py::init<const CpuPassStrategy &>()) .def(py::init<const CpuPassStrategy &>())
.def("enable_cudnn", &CpuPassStrategy::EnableCUDNN) .def("enable_cudnn", &CpuPassStrategy::EnableCUDNN)
.def("enable_mkldnn", &CpuPassStrategy::EnableMKLDNN) .def("enable_mkldnn", &CpuPassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer); .def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &CpuPassStrategy::EnableMkldnnBfloat16);
py::class_<GpuPassStrategy, PassStrategy>(*m, "GpuPassStrategy") py::class_<GpuPassStrategy, PassStrategy>(*m, "GpuPassStrategy")
.def(py::init<>()) .def(py::init<>())
.def(py::init<const GpuPassStrategy &>()) .def(py::init<const GpuPassStrategy &>())
.def("enable_cudnn", &GpuPassStrategy::EnableCUDNN) .def("enable_cudnn", &GpuPassStrategy::EnableCUDNN)
.def("enable_mkldnn", &GpuPassStrategy::EnableMKLDNN) .def("enable_mkldnn", &GpuPassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer); .def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &GpuPassStrategy::EnableMkldnnBfloat16);
} }
} // namespace } // namespace
} // namespace pybind } // namespace pybind
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册