未验证 提交 02083bda 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Add mkldnn bfloat16 option to C-API (#26676)

* Add mkldnn bfloat16 option to C-API

* Add test for bfloat16 gpu

* Change coverage test
上级 5f275aad
......@@ -154,10 +154,17 @@ func (config *AnalysisConfig) EnableMkldnnQuantizer() {
C.PD_EnableMkldnnQuantizer(config.c)
}
func (config *AnalysisConfig) EnableMkldnnBfloat16() {
C.PD_EnableMkldnnBfloat16(config.c)
}
func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
}
func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c))
}
// SetModelBuffer
// ModelFromMemory
......
......@@ -218,6 +218,17 @@ void AnalysisConfig::EnableMkldnnQuantizer() {
Update();
}
void AnalysisConfig::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
use_mkldnn_bfloat16_ = true;
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnBfloat16";
use_mkldnn_bfloat16_ = false;
#endif
Update();
}
MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet.");
......@@ -331,6 +342,12 @@ void AnalysisConfig::Update() {
#endif
}
if (use_mkldnn_bfloat16_) {
#ifdef PADDLE_WITH_MKLDNN
pass_builder()->EnableMkldnnBfloat16();
#endif
}
#ifdef PADDLE_WITH_MKLDNN
// Do not optimize when mkldnn is on
if (enable_memory_optim_ && !use_mkldnn_) {
......@@ -399,6 +416,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << ";";
ss << use_mkldnn_quantizer_;
ss << use_mkldnn_bfloat16_;
ss << model_from_memory_;
ss << with_profile_;
......
......@@ -485,4 +485,25 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) {
}
#endif
#ifdef PADDLE_WITH_CUDA
TEST(AnalysisPredictor, bf16_gpu_pass_strategy) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
config.SwitchIrOptim(true);
config.EnableUseGpu(100, 0);
config.EnableMkldnnBfloat16();
#ifdef PADDLE_WITH_MKLDNN
ASSERT_EQ(config.mkldnn_bfloat16_enabled(), true);
#else
ASSERT_EQ(config.mkldnn_bfloat16_enabled(), false);
#endif
}
#endif
TEST(AnalysisPredictor, bf16_pass_strategy) {
std::vector<std::string> passes;
PassStrategy passStrategy(passes);
passStrategy.EnableMkldnnBfloat16();
}
} // namespace paddle
......@@ -401,6 +401,19 @@ struct PD_INFER_DECL AnalysisConfig {
///
void EnableMkldnnQuantizer();
///
/// \brief Turn on MKLDNN bfloat16.
///
///
void EnableMkldnnBfloat16();
///
/// \brief A boolean state telling whether to use the MKLDNN Bfloat16.
///
/// \return bool Whether to use the MKLDNN Bfloat16.
///
bool mkldnn_bfloat16_enabled() const { return use_mkldnn_bfloat16_; }
///
/// \brief A boolean state telling whether the thread local CUDA stream is
/// enabled.
......@@ -592,6 +605,7 @@ struct PD_INFER_DECL AnalysisConfig {
int mkldnn_cache_capacity_{0};
bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
bool use_mkldnn_bfloat16_{false};
// If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor.
......
......@@ -143,6 +143,10 @@ void GpuPassStrategy::EnableMkldnnQuantizer() {
LOG(ERROR) << "GPU not support MKL-DNN quantization";
}
void GpuPassStrategy::EnableMkldnnBfloat16() {
LOG(ERROR) << "GPU not support MKL-DNN bfloat16";
}
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
......@@ -223,4 +227,12 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
#endif
}
void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
use_mkldnn_bfloat16_ = true;
#else
use_mkldnn_bfloat16_ = false;
#endif
}
} // namespace paddle
......@@ -132,6 +132,9 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \brief Enable MKLDNN quantize optimization.
virtual void EnableMkldnnQuantizer() {}
/// \brief Enable MKLDNN bfloat16.
virtual void EnableMkldnnBfloat16() {}
/// \brief Check if we are using gpu.
/// \return A bool variable implying whether we are in gpu mode.
bool use_gpu() const { return use_gpu_; }
......@@ -161,6 +164,7 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy {
use_gpu_ = other.use_gpu_;
use_mkldnn_ = other.use_mkldnn_;
use_mkldnn_quantizer_ = other.use_mkldnn_quantizer_;
use_mkldnn_bfloat16_ = other.use_mkldnn_bfloat16_;
}
/// \brief Default destructor.
virtual ~CpuPassStrategy() = default;
......@@ -174,9 +178,13 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy {
/// \brief Enable MKLDNN quantize optimization.
void EnableMkldnnQuantizer() override;
/// \brief Enable MKLDNN bfloat16.
void EnableMkldnnBfloat16() override;
protected:
/// \cond Protected
bool use_mkldnn_quantizer_{false};
bool use_mkldnn_bfloat16_{false};
/// \endcond
};
......@@ -205,6 +213,9 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
/// \brief Not supported in GPU mode yet.
void EnableMkldnnQuantizer() override;
/// \brief Not supported in GPU mode yet.
void EnableMkldnnBfloat16() override;
/// \brief Default destructor.
virtual ~GpuPassStrategy() = default;
......
......@@ -235,6 +235,12 @@ PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnQuantizer(
PADDLE_CAPI_EXPORT extern bool PD_MkldnnQuantizerEnabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnBfloat16(
PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern bool PD_MkldnnBfloat16Enabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SetModelBuffer(PD_AnalysisConfig* config,
const char* prog_buffer,
size_t prog_buffer_size,
......
......@@ -207,6 +207,18 @@ bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) {
return config->config.mkldnn_quantizer_enabled();
}
void PD_EnableMkldnnBfloat16(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound(
"PD_AnalysisConfig should not be null"));
config->config.EnableMkldnnBfloat16();
}
bool PD_MkldnnBfloat16Enabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound(
"PD_AnalysisConfig should not be null"));
return config->config.mkldnn_bfloat16_enabled();
}
void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer,
size_t prog_buffer_size, const char* params_buffer,
size_t params_buffer_size) {
......
......@@ -54,6 +54,9 @@ TEST(PD_AnalysisConfig, use_gpu) {
PD_SwitchIrOptim(config, true);
bool ir_optim = PD_IrOptim(config);
CHECK(ir_optim) << "NO";
PD_EnableMkldnnBfloat16(config);
bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config);
CHECK(!bfloat16_enable) << "NO";
PD_EnableTensorRtEngine(config, 1 << 20, 1, 3, Precision::kFloat32, false,
false);
bool trt_enable = PD_TensorrtEngineEnabled(config);
......
......@@ -88,6 +88,9 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
PD_EnableMkldnnQuantizer(config);
bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
CHECK(quantizer_enable) << "NO";
PD_EnableMkldnnBfloat16(config);
bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config);
CHECK(bfloat16_enable) << "NO";
PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_DeleteAnalysisConfig(config);
......
......@@ -448,6 +448,7 @@ void BindAnalysisConfig(py::module *m) {
&AnalysisConfig::cpu_math_library_num_threads)
.def("to_native_config", &AnalysisConfig::ToNativeConfig)
.def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16)
#ifdef PADDLE_WITH_MKLDNN
.def("quantizer_config", &AnalysisConfig::mkldnn_quantizer_config,
py::return_value_policy::reference)
......@@ -565,6 +566,7 @@ void BindPaddlePassBuilder(py::module *m) {
.def("enable_cudnn", &PassStrategy::EnableCUDNN)
.def("enable_mkldnn", &PassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &PassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &PassStrategy::EnableMkldnnBfloat16)
.def("use_gpu", &PassStrategy::use_gpu);
py::class_<CpuPassStrategy, PassStrategy>(*m, "CpuPassStrategy")
......@@ -572,14 +574,16 @@ void BindPaddlePassBuilder(py::module *m) {
.def(py::init<const CpuPassStrategy &>())
.def("enable_cudnn", &CpuPassStrategy::EnableCUDNN)
.def("enable_mkldnn", &CpuPassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer);
.def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &CpuPassStrategy::EnableMkldnnBfloat16);
py::class_<GpuPassStrategy, PassStrategy>(*m, "GpuPassStrategy")
.def(py::init<>())
.def(py::init<const GpuPassStrategy &>())
.def("enable_cudnn", &GpuPassStrategy::EnableCUDNN)
.def("enable_mkldnn", &GpuPassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer);
.def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &GpuPassStrategy::EnableMkldnnBfloat16);
}
} // namespace
} // namespace pybind
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册