From 943ad4781f5c96573e9615668c1cc73dcb378356 Mon Sep 17 00:00:00 2001 From: bingyanghuang <33643817+bingyanghuang@users.noreply.github.com> Date: Sat, 8 Dec 2018 10:54:33 +0800 Subject: [PATCH] One possible solution to add flexibility for mkldnn placement pass (#14768) * Choose to turn on use_mkldnn attribute v1 * Fix mkldnn_op empty bug * format change test=develop * fix ci test=develop * fix ci test and add test in dam test=develop * add example to dam compare test test=develop * review changes test=develop --- paddle/fluid/framework/ir/mkldnn_placement_pass.cc | 14 +++++++++++--- paddle/fluid/inference/analysis/argument.h | 4 ++++ paddle/fluid/inference/analysis/ir_pass_manager.cc | 5 +++++ paddle/fluid/inference/api/analysis_config.cc | 8 ++++++++ paddle/fluid/inference/api/analysis_predictor.cc | 4 ++++ .../fluid/inference/api/paddle_analysis_config.h | 5 +++++ .../inference/tests/api/analyzer_dam_tester.cc | 4 ++++ 7 files changed, 41 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn_placement_pass.cc index 1cf1315d3d3..9a9314161b0 100644 --- a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn_placement_pass.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/mkldnn_placement_pass.h" +#include namespace paddle { namespace framework { @@ -21,9 +22,16 @@ namespace ir { std::unique_ptr MKLDNNPlacementPass::ApplyImpl( std::unique_ptr graph) const { VLOG(3) << "Aplies MKL-DNN placement strategy."; + const auto& op_types_list = + Get>("mkldnn_enabled_op_types"); for (const Node* n : graph->Nodes()) { if (n->IsOp() && n->RuntimeHasAttr("use_mkldnn")) { - n->Op()->SetAttr("use_mkldnn", true); + if (op_types_list.empty()) { + n->Op()->SetAttr("use_mkldnn", true); + } else if (std::find(op_types_list.begin(), op_types_list.end(), + n->Name()) != op_types_list.end()) { + n->Op()->SetAttr("use_mkldnn", true); + } } } return graph; @@ -33,5 +41,5 @@ std::unique_ptr MKLDNNPlacementPass::ApplyImpl( } // namespace framework } // namespace paddle -REGISTER_PASS(mkldnn_placement_pass, - paddle::framework::ir::MKLDNNPlacementPass); +REGISTER_PASS(mkldnn_placement_pass, paddle::framework::ir::MKLDNNPlacementPass) + .RequirePassAttr("mkldnn_enabled_op_types"); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 53cc7039f20..83d411eecf6 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -116,6 +116,10 @@ struct Argument { DECL_ARGUMENT_FIELD(ir_analysis_passes, IrAnalysisPasses, std::vector); + // Pass a set of op types to enable its mkldnn kernel + DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes, + std::unordered_set); + DECL_ARGUMENT_FIELD(use_gpu, UseGPU, bool); DECL_ARGUMENT_FIELD(gpu_device_id, GPUDeviceId, int); DECL_ARGUMENT_FIELD(use_tensorrt, UseTensorRT, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index fce5e1cac92..51bca8039d4 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -63,6 +63,11 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("graph_viz_path", new std::string(std::move(dot_file_path))); pass_num++; } + if (pass_name == "mkldnn_placement_pass") { + pass->Set("mkldnn_enabled_op_types", + new std::unordered_set( + argument->mkldnn_enabled_op_types())); + } if (pass_name == "tensorrt_subgraph_pass") { PADDLE_ENFORCE(argument->tensorrt_node_teller_valid()); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 384d1dc27d6..dcefdd92f51 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -49,6 +49,10 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) { cpu_math_library_num_threads_ = other.cpu_math_library_num_threads_; // fields from this. enable_ir_optim = other.enable_ir_optim; + // For mkldnn + use_mkldnn_ = other.use_mkldnn_; + mkldnn_enabled_op_types_ = other.mkldnn_enabled_op_types_; + use_feed_fetch_ops = other.use_feed_fetch_ops; use_tensorrt_ = other.use_tensorrt_; tensorrt_max_batchsize_ = other.tensorrt_max_batchsize_; @@ -77,6 +81,10 @@ contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) { cpu_math_library_num_threads_ = other.cpu_math_library_num_threads_; // fields from this. enable_ir_optim = other.enable_ir_optim; + // For mkldnn + use_mkldnn_ = other.use_mkldnn_; + mkldnn_enabled_op_types_ = other.mkldnn_enabled_op_types_; + use_feed_fetch_ops = other.use_feed_fetch_ops; use_tensorrt_ = other.use_tensorrt_; tensorrt_max_batchsize_ = other.tensorrt_max_batchsize_; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 84f7eca0570..be51e7fc1f0 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -327,6 +327,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() { argument_.SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_); } + if (config_.use_mkldnn_) { + argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_); + } + auto passes = config_.pass_builder()->AllPasses(); if (!config_.enable_ir_optim) passes.clear(); argument_.SetIrAnalysisPasses(passes); diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index a08e3d027e0..f05b9832da5 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -16,6 +16,7 @@ #include #include #include +#include #include // Here we include some header files with relative paths, for that in deploy, @@ -53,6 +54,9 @@ struct AnalysisConfig : public NativeConfig { void EnableMKLDNN(); bool use_mkldnn() const { return use_mkldnn_; } + void SetMKLDNNOp(std::unordered_set op_list) { + mkldnn_enabled_op_types_ = op_list; + } // Specify the memory buffer of program and parameter void SetModelBuffer(const char* prog_buffer, size_t prog_buffer_size, @@ -64,6 +68,7 @@ struct AnalysisConfig : public NativeConfig { protected: bool use_tensorrt_{false}; bool use_mkldnn_{false}; + std::unordered_set mkldnn_enabled_op_types_; int tensorrt_workspace_size_; int tensorrt_max_batchsize_; std::unique_ptr pass_builder_; diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index e8abcfce05f..227e2ff4587 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -194,6 +194,8 @@ void profile(bool use_mkldnn = false) { if (use_mkldnn) { cfg.EnableMKLDNN(); + std::unordered_set op_list = {"conv3d"}; + cfg.SetMKLDNNOp(op_list); } std::vector outputs; @@ -236,6 +238,8 @@ void compare(bool use_mkldnn = false) { SetConfig(&cfg); if (use_mkldnn) { cfg.EnableMKLDNN(); + std::unordered_set op_list = {"conv3d"}; + cfg.SetMKLDNNOp(op_list); } std::vector> input_slots_all; -- GitLab