diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index fdf2f228f59fe8889f00dc6a1d3b11540f8e779c..529a859458a9884a53778e7133ab121ed582a3fb 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -36,13 +36,14 @@ template AnakinEngine::AnakinEngine( bool need_summary, int device, int max_batch_size, std::map> max_input_shape, - std::vector program_inputs) + std::vector program_inputs, bool auto_config_layout) : graph_(new AnakinGraphT()), net_(new AnakinNetT(need_summary)) { device_ = device; max_batch_size_ = max_batch_size; max_input_shape_ = max_input_shape; program_inputs_ = program_inputs; + auto_config_layout_ = auto_config_layout; } template @@ -57,7 +58,7 @@ void AnakinEngine::SetInputShape( template void AnakinEngine::InitNet() { - net_->init(*graph_); + net_->init(*graph_, auto_config_layout_); } template diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h index 5e76331cc56b478827d734c1668b177f46185c55..fb40f56511ba255413d422f156f4265102616d03 100644 --- a/paddle/fluid/inference/anakin/engine.h +++ b/paddle/fluid/inference/anakin/engine.h @@ -58,7 +58,8 @@ class AnakinEngine { explicit AnakinEngine( bool need_summary = false, int device = 0, int max_batch_size = 1, std::map> max_input_shape = {}, - std::vector program_inputs = {}); + std::vector program_inputs = {}, + bool auto_config_layout = false); ~AnakinEngine(); void InitNet(); void SetInputShape(const std::string &name, std::vector shape); @@ -120,6 +121,8 @@ class AnakinEngine { std::unique_ptr net_; std::vector program_inputs_; std::unordered_map tensor_scales_; + // Always be false in gpu mode but true in most cpu cases. + bool auto_config_layout_; }; template @@ -138,10 +141,11 @@ class AnakinEngineManager { AnakinEngineT *Create(bool need_summary, int device, int max_batch_size, std::map> max_input_shape, std::vector program_inputs, - std::string engine_name) { + bool auto_config_layout, std::string engine_name) { std::unique_lock lk(mut_); auto *p = new AnakinEngine( - need_summary, device, max_batch_size, max_input_shape, program_inputs); + need_summary, device, max_batch_size, max_input_shape, program_inputs, + auto_config_layout); engines_[engine_name].reset(p); return p; } diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 0e6374201f4623e35c3ed40bdd629a035df1f1d9..66e8d8b5287178fd00dba963a2f4011ce8d8e51e 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -171,6 +171,7 @@ struct Argument { DECL_ARGUMENT_FIELD(anakin_min_subgraph_size, AnakinMinSubgraphSize, int); DECL_ARGUMENT_FIELD(anakin_precision_mode, AnakinPrecisionMode, AnalysisConfig::Precision); + DECL_ARGUMENT_FIELD(anakin_auto_config_layout, AnakinAutoConfigLayout, bool); DECL_ARGUMENT_FIELD(use_anakin, UseAnakin, bool); DECL_ARGUMENT_FIELD(anakin_passes_filter, AnakinPassesFilter, std::vector); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 25db3346cfffd64a4fea59a376bd57fd8c76b6d2..4714c30507c4c3f8978ec10f3b19fd3f8a3b3b3d 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -128,6 +128,8 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("enable_int8", new bool(enable_int8)); pass->Set("anakin_ops_filter", new std::vector(argument->anakin_ops_filter())); + pass->Set("auto_config_layout", + new bool(argument->anakin_auto_config_layout())); } pre_pass = pass_name; diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc index 5f74121dc3a6796b1f2b7566e62e8b0a95f85617..9586ce3e6b01422db1616060946cf5b11c5a1c29 100644 --- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -226,18 +226,20 @@ void AnakinSubgraphPass::CreateAnakinEngine( auto max_batch_size = Get("max_batch_size"); auto max_input_shape = Get>>("max_input_shape"); + bool auto_config_layout = Get("auto_config_layout"); if (use_gpu) { #ifdef PADDLE_WITH_CUDA inference::Singleton< anakin::AnakinEngineManager<::anakin::saber::NV, PrecisionT>>::Global() .Create(true, Get("gpu_device_id"), max_batch_size, - max_input_shape, program_inputs, engine_key); + max_input_shape, program_inputs, false, engine_key); #endif } else { inference::Singleton< anakin::AnakinEngineManager<::anakin::saber::X86, PrecisionT>>::Global() .Create(true, Get("gpu_device_id"), max_batch_size, - max_input_shape, program_inputs, engine_key); + max_input_shape, program_inputs, auto_config_layout, + engine_key); } auto *scope = param_scope(); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 228d80bf9f75964fe619560224a3ae835be6326f..4fe0c48d8f31d25078146a39b5a7cc315bc64903 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -117,6 +117,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(anakin_max_input_shape_); CP_MEMBER(anakin_min_subgraph_size_); CP_MEMBER(anakin_precision_mode_); + CP_MEMBER(anakin_auto_config_layout_); CP_MEMBER(anakin_passes_filter_); CP_MEMBER(anakin_ops_filter_); @@ -398,7 +399,7 @@ void AnalysisConfig::SwitchIrDebug(int x) { void AnalysisConfig::EnableAnakinEngine( int max_batch_size, std::map> max_input_shape, int min_subgraph_size, AnalysisConfig::Precision precision_mode, - std::vector passes_filter, + bool auto_config_layout, std::vector passes_filter, std::vector ops_filter) { anakin_max_batchsize_ = max_batch_size; anakin_max_input_shape_ = max_input_shape; @@ -407,6 +408,7 @@ void AnalysisConfig::EnableAnakinEngine( anakin_ops_filter_ = ops_filter; use_anakin_ = true; anakin_precision_mode_ = precision_mode; + anakin_auto_config_layout_ = auto_config_layout; Update(); } } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7552a576a655b9befca1fee221193bdf512f4cbc..677f5bf130f124db90ac99155d2ec336604ec17e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -387,6 +387,7 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetAnakinMaxInputShape(config_.anakin_max_input_shape_); argument_.SetAnakinMinSubgraphSize(config_.anakin_min_subgraph_size_); argument_.SetAnakinPrecisionMode(config_.anakin_precision_mode_); + argument_.SetAnakinAutoConfigLayout(config_.anakin_auto_config_layout_); argument_.SetAnakinPassesFilter(config_.anakin_passes_filter_); argument_.SetAnakinOpsFilter(config_.anakin_ops_filter_); LOG(INFO) << "Anakin subgraph engine is enabled"; diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 0f1c42c3602cbf1a5976f00649df22c6a0adea74..ebe289322bdd32294885ce933b960773733f62f0 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -153,6 +153,7 @@ struct AnalysisConfig { int max_batch_size = 1, std::map> max_input_shape = {}, int min_subgraph_size = 6, Precision precision = Precision::kFloat32, + bool auto_config_layout = false, std::vector passes_filter = {}, std::vector ops_filter = {}); @@ -294,6 +295,7 @@ struct AnalysisConfig { int anakin_min_subgraph_size_{6}; std::map> anakin_max_input_shape_; Precision anakin_precision_mode_; + bool anakin_auto_config_layout_{false}; std::vector anakin_passes_filter_; std::vector anakin_ops_filter_; std::map engine_opt_info_; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index d531f754ccafc8aa5b47e24a9140b94bc0b5603d..fea291c5528a11fd18b1069a5d57e456c8cc84fc 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -78,11 +78,9 @@ const std::vector kAnakinSubgraphPasses({ "fillconstant_elementwisemul_fuse", // "fc_fuse_pass", // "conv_elementwise_add_fuse_pass", // - // "conv_bn_fuse_pass", // - // "conv_elementwise_add_fuse_pass", // - "fc_gru_fuse_pass", // - "anakin_subgraph_pass", // - "fc_gru_fuse_pass", // + "fc_gru_fuse_pass", // + "anakin_subgraph_pass", // + "fc_gru_fuse_pass", // }); GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 8385e6331d757bce93e2db61e532fa829e43c9ec..b650225c64a9a37e46d5b6f14eb2f03bebbaa71f 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -236,6 +236,7 @@ void BindAnalysisConfig(py::module *m) { std::map>(), py::arg("min_subgraph_size") = 6, py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32, + py::arg("auto_config_layout") = false, py::arg("passes_filter") = std::vector(), py::arg("ops_filter") = std::vector()) .def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled)