From 943ad4781f5c96573e9615668c1cc73dcb378356 Mon Sep 17 00:00:00 2001
From: bingyanghuang <33643817+bingyanghuang@users.noreply.github.com>
Date: Sat, 8 Dec 2018 10:54:33 +0800
Subject: [PATCH] One possible solution to add flexibility for mkldnn placement
 pass (#14768)

* Choose to turn on use_mkldnn attribute v1

* Fix mkldnn_op empty bug

* format change test=develop

* fix ci test=develop

* fix ci test and add test in dam test=develop

* add example to dam compare test test=develop

* review changes test=develop
---
 paddle/fluid/framework/ir/mkldnn_placement_pass.cc | 14 +++++++++++---
 paddle/fluid/inference/analysis/argument.h         |  4 ++++
 paddle/fluid/inference/analysis/ir_pass_manager.cc |  5 +++++
 paddle/fluid/inference/api/analysis_config.cc      |  8 ++++++++
 paddle/fluid/inference/api/analysis_predictor.cc   |  4 ++++
 .../fluid/inference/api/paddle_analysis_config.h   |  5 +++++
 .../inference/tests/api/analyzer_dam_tester.cc     |  4 ++++
 7 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn_placement_pass.cc
index 1cf1315d3d3..9a9314161b0 100644
--- a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn_placement_pass.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/framework/ir/mkldnn_placement_pass.h"
+#include <string>
 
 namespace paddle {
 namespace framework {
@@ -21,9 +22,16 @@ namespace ir {
 std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
     std::unique_ptr<ir::Graph> graph) const {
   VLOG(3) << "Aplies MKL-DNN placement strategy.";
+  const auto& op_types_list =
+      Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types");
   for (const Node* n : graph->Nodes()) {
     if (n->IsOp() && n->RuntimeHasAttr("use_mkldnn")) {
-      n->Op()->SetAttr("use_mkldnn", true);
+      if (op_types_list.empty()) {
+        n->Op()->SetAttr("use_mkldnn", true);
+      } else if (std::find(op_types_list.begin(), op_types_list.end(),
+                           n->Name()) != op_types_list.end()) {
+        n->Op()->SetAttr("use_mkldnn", true);
+      }
     }
   }
   return graph;
@@ -33,5 +41,5 @@ std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
 }  // namespace framework
 }  // namespace paddle
 
-REGISTER_PASS(mkldnn_placement_pass,
-              paddle::framework::ir::MKLDNNPlacementPass);
+REGISTER_PASS(mkldnn_placement_pass, paddle::framework::ir::MKLDNNPlacementPass)
+    .RequirePassAttr("mkldnn_enabled_op_types");
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index 53cc7039f20..83d411eecf6 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -116,6 +116,10 @@ struct Argument {
   DECL_ARGUMENT_FIELD(ir_analysis_passes, IrAnalysisPasses,
                       std::vector<std::string>);
 
+  // Pass a set of op types to enable its mkldnn kernel
+  DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes,
+                      std::unordered_set<std::string>);
+
   DECL_ARGUMENT_FIELD(use_gpu, UseGPU, bool);
   DECL_ARGUMENT_FIELD(gpu_device_id, GPUDeviceId, int);
   DECL_ARGUMENT_FIELD(use_tensorrt, UseTensorRT, bool);
diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
index fce5e1cac92..51bca8039d4 100644
--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -63,6 +63,11 @@ void IRPassManager::CreatePasses(Argument *argument,
       pass->Set("graph_viz_path", new std::string(std::move(dot_file_path)));
       pass_num++;
     }
+    if (pass_name == "mkldnn_placement_pass") {
+      pass->Set("mkldnn_enabled_op_types",
+                new std::unordered_set<std::string>(
+                    argument->mkldnn_enabled_op_types()));
+    }
 
     if (pass_name == "tensorrt_subgraph_pass") {
       PADDLE_ENFORCE(argument->tensorrt_node_teller_valid());
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
index 384d1dc27d6..dcefdd92f51 100644
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -49,6 +49,10 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
   cpu_math_library_num_threads_ = other.cpu_math_library_num_threads_;
   // fields from this.
   enable_ir_optim = other.enable_ir_optim;
+  // For mkldnn
+  use_mkldnn_ = other.use_mkldnn_;
+  mkldnn_enabled_op_types_ = other.mkldnn_enabled_op_types_;
+
   use_feed_fetch_ops = other.use_feed_fetch_ops;
   use_tensorrt_ = other.use_tensorrt_;
   tensorrt_max_batchsize_ = other.tensorrt_max_batchsize_;
@@ -77,6 +81,10 @@ contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) {
   cpu_math_library_num_threads_ = other.cpu_math_library_num_threads_;
   // fields from this.
   enable_ir_optim = other.enable_ir_optim;
+  // For mkldnn
+  use_mkldnn_ = other.use_mkldnn_;
+  mkldnn_enabled_op_types_ = other.mkldnn_enabled_op_types_;
+
   use_feed_fetch_ops = other.use_feed_fetch_ops;
   use_tensorrt_ = other.use_tensorrt_;
   tensorrt_max_batchsize_ = other.tensorrt_max_batchsize_;
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 84f7eca0570..be51e7fc1f0 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -327,6 +327,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
     argument_.SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
   }
 
+  if (config_.use_mkldnn_) {
+    argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_);
+  }
+
   auto passes = config_.pass_builder()->AllPasses();
   if (!config_.enable_ir_optim) passes.clear();
   argument_.SetIrAnalysisPasses(passes);
diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
index a08e3d027e0..f05b9832da5 100644
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <memory>
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 // Here we include some header files with relative paths, for that in deploy,
@@ -53,6 +54,9 @@ struct AnalysisConfig : public NativeConfig {
 
   void EnableMKLDNN();
   bool use_mkldnn() const { return use_mkldnn_; }
+  void SetMKLDNNOp(std::unordered_set<std::string> op_list) {
+    mkldnn_enabled_op_types_ = op_list;
+  }
 
   // Specify the memory buffer of program and parameter
   void SetModelBuffer(const char* prog_buffer, size_t prog_buffer_size,
@@ -64,6 +68,7 @@ struct AnalysisConfig : public NativeConfig {
  protected:
   bool use_tensorrt_{false};
   bool use_mkldnn_{false};
+  std::unordered_set<std::string> mkldnn_enabled_op_types_;
   int tensorrt_workspace_size_;
   int tensorrt_max_batchsize_;
   std::unique_ptr<PassStrategy> pass_builder_;
diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
index e8abcfce05f..227e2ff4587 100644
--- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
@@ -194,6 +194,8 @@ void profile(bool use_mkldnn = false) {
 
   if (use_mkldnn) {
     cfg.EnableMKLDNN();
+    std::unordered_set<std::string> op_list = {"conv3d"};
+    cfg.SetMKLDNNOp(op_list);
   }
 
   std::vector<PaddleTensor> outputs;
@@ -236,6 +238,8 @@ void compare(bool use_mkldnn = false) {
   SetConfig(&cfg);
   if (use_mkldnn) {
     cfg.EnableMKLDNN();
+    std::unordered_set<std::string> op_list = {"conv3d"};
+    cfg.SetMKLDNNOp(op_list);
   }
 
   std::vector<std::vector<PaddleTensor>> input_slots_all;
-- 
GitLab