From 5b2e98aa176b9e8d7858105cc28af2d1e6d609bc Mon Sep 17 00:00:00 2001
From: "joanna.wozna.intel" <joanna.wozna@intel.com>
Date: Thu, 9 Jan 2020 04:16:37 +0100
Subject: [PATCH] Add multiple quantize operators fuse (#22062)

---
 .../framework/ir/graph_pattern_detector.cc    | 15 +++++
 .../framework/ir/graph_pattern_detector.h     | 10 ++++
 .../ir/mkldnn/cpu_quantize_squash_pass.cc     | 57 +++++++++++++++++++
 .../ir/mkldnn/cpu_quantize_squash_pass.h      |  5 ++
 .../mkldnn/cpu_quantize_squash_pass_tester.cc | 50 ++++++++++++++++
 5 files changed, 137 insertions(+)
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index d4eb254c32..d407016325 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -1659,6 +1659,21 @@ PDNode *patterns::DequantAny::operator()() {
   return dequant_out;
 }
 
+PDNode *patterns::MultipleQuantize::operator()() {
+  auto *prev_out = pattern->NewNode(prev_out_repr())->AsOutput();
+
+  // find nodes that are inputs to quantize operators
+  prev_out->assert_more([&](Node *node) {
+    int counter = std::count_if(
+        node->outputs.begin(), node->outputs.end(), [&](Node const *iter) {
+          return iter && iter->IsOp() && iter->Op()->Type() == "quantize";
+        });
+    return (counter > 1);
+  });
+
+  return prev_out;
+}
+
 // a -> transpose_op(1) -> transpose_out_a -> flatten_op(1) -> flatten_out_a
 // b -> transpose_op(2) -> transpose_out_b -> flatten_op(2) -> flatten_out_b
 // ...
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
index 35e142a174..ec1db60cc4 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -1004,6 +1004,16 @@ struct DequantAny : public PatternBase {
   PATTERN_DECL_NODE(next_op);
 };
 
+// anyOp + more then one quantize op
+// This pattern is used for squashing multiple quantize with the same scale.
+struct MultipleQuantize : public PatternBase {
+  MultipleQuantize(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "multiple_quantize") {}
+  PDNode* operator()();
+
+  PATTERN_DECL_NODE(prev_out);
+};
+
 struct TransposeFlattenConcat : public PatternBase {
   TransposeFlattenConcat(PDPattern* pattern, const std::string& name_scope)
       : PatternBase(pattern, name_scope, "transpose_flatten_concat") {}
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
index 2d98758985..3dfd82ebcf 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
@@ -228,6 +228,62 @@ void CPUQuantizeSquashPass::FcDequantSquash(Graph* graph) const {
                   found_fc_dequant_squash_count);
 }
 
+void CPUQuantizeSquashPass::MultipleQuantizeSquash(Graph* graph) const {
+  GraphPatternDetector gpd;
+  patterns::MultipleQuantize multiple_quantize_pattern{gpd.mutable_pattern(),
+                                                       "multiple_quantize"};
+  multiple_quantize_pattern();
+
+  int found_multiple_quantize_squash_count = 0;
+  int removed_quantize = 0;
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    VLOG(4) << "fuse multiple quantize ops";
+
+    GET_IR_NODE_FROM_SUBGRAPH(prev_out, prev_out, multiple_quantize_pattern);
+
+    auto* first_quant_op = *(std::find_if(
+        prev_out->outputs.begin(), prev_out->outputs.end(), [&](Node* node) {
+          return (node->IsOp() && node->Op()->Type() == "quantize");
+        }));
+    auto* first_quant_out = first_quant_op->outputs[0];
+    float scale = first_quant_op->Op()->GetAttrIfExists<float>("Scale");
+
+    PADDLE_ENFORCE_NE(scale, 0, platform::errors::InvalidArgument(
+                                    "Quantize scale should not be equal 0"));
+
+    for (int iter = prev_out->outputs.size() - 1; iter >= 0; iter--) {
+      auto quant_op = prev_out->outputs[iter];
+      if (quant_op->IsOp() && quant_op->Op()->Type() == "quantize" &&
+          quant_op->id() != first_quant_op->id() &&
+          quant_op->Op()->GetAttrIfExists<float>("Scale") == scale) {
+        auto quant_out = quant_op->outputs[0];
+        auto last_op = quant_out->outputs[0];
+
+        std::string last_op_input_name;
+        for (auto name : last_op->Op()->InputNames())
+          for (auto input_name : last_op->Op()->Input(name))
+            if (input_name == quant_out->Name()) last_op_input_name = name;
+
+        PADDLE_ENFORCE_NE(
+            last_op_input_name.empty(), true,
+            platform::errors::NotFound("Operator after quantize operator "
+                                       "should has quantize output as input"));
+        last_op->Op()->SetInput(
+            last_op_input_name,
+            std::vector<std::string>({first_quant_out->Name()}));
+        IR_NODE_LINK_TO(first_quant_out, last_op);
+        GraphSafeRemoveNodes(graph, {quant_op, quant_out});
+        removed_quantize++;
+      }
+    }
+    found_multiple_quantize_squash_count++;
+  };
+  gpd(graph, handler);
+  AddStatis(found_multiple_quantize_squash_count);
+  PrettyLogDetail("---    squashed %d quantize op", removed_quantize);
+}
+
 void CPUQuantizeSquashPass::ApplyImpl(ir::Graph* graph) const {
   PADDLE_ENFORCE_NOT_NULL(
       graph,
@@ -240,6 +296,7 @@ void CPUQuantizeSquashPass::ApplyImpl(ir::Graph* graph) const {
   DequantQuantSquash(graph, &nodes_keep_counter);
   ConvDequantSquash(graph);
   FcDequantSquash(graph);
+  MultipleQuantizeSquash(graph);
 }
 
 }  // namespace ir
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h
index b02e057948..af8a66c929 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h
@@ -65,6 +65,11 @@ class CPUQuantizeSquashPass : public FusePassBase {
   */
   void FcDequantSquash(Graph* graph) const;
 
+  /*
+  *  Squash quantize if several quatize ops have the same scale
+  */
+  void MultipleQuantizeSquash(Graph* graph) const;
+
   const std::string name_scope_{"squash"};
 };
 
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
index 9f5597c6dc..93ae9c8796 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
@@ -230,6 +230,28 @@ ProgramDesc BuildConvDequantConvProgramDesc(bool use_mkldnn, float scale_out,
   return prog;
 }
 
+// a->concat->b
+// b->Quant1(Scale1)->c
+// b->Quant2(Scale2)->d
+// b->concat->e
+// c->fc->f
+// d->fc->g
+ProgramDesc BuildMultipleQuantizeProgramDesc(bool use_mkldnn, float first_scale,
+                                             float second_scale) {
+  ProgramDesc prog;
+  for (auto& v : variable_names) {
+    prog.MutableBlock(0)->Var(v);
+  }
+  SetOp(&prog, "concat", "Concat1", {"a"}, {"b"}, use_mkldnn);
+  SetOp(&prog, "quantize", "Quantize1", {"b"}, {"c"}, use_mkldnn, first_scale);
+  SetOp(&prog, "quantize", "Quantize2", {"b"}, {"d"}, use_mkldnn, second_scale);
+  SetOp(&prog, "concat", "Concat2", {"b"}, {"e"}, use_mkldnn);
+  SetOp(&prog, "fc", "Fc1", {"c", "w1"}, {"f"}, use_mkldnn, first_scale);
+  SetOp(&prog, "fc", "Fc2", {"d", "w2"}, {"g"}, use_mkldnn, second_scale);
+
+  return prog;
+}
+
 void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                       const char* var_name) {
   auto x = scope->Var(var_name);
@@ -467,6 +489,34 @@ TEST(CpuQuantizeSquashPass, fc_dequant_more_than_one_op_after_dequant) {
       BuildFcDequantFcProgramDesc(use_mkldnn, scale_out, scale), "fc", false);
 }
 
+// a->Concat1->b
+// b->Concat2
+// b->Quatize1(Scale)->c
+// c->Fc1
+// c->Fc2
+TEST(CpuQuantizeSquashPass, quatize_with_same_scale) {
+  auto first_scale = 1.2345f;
+  auto second_scale = 1.2345f;
+  auto use_mkldnn = true;
+  // remove nodes: Quantize2 + d
+  auto remove_nodes = 1 + 1;
+  CountNodeTest(
+      BuildMultipleQuantizeProgramDesc(use_mkldnn, first_scale, second_scale),
+      remove_nodes);
+}
+
+// if scales are not the same, do not fuse
+TEST(CpuQuantizeSquashPass, quatize_with_different_scale) {
+  auto first_scale = 1.2345f;
+  auto second_scale = 1.5432f;
+  auto use_mkldnn = true;
+  // nothing change
+  auto remove_nodes = 0;
+  CountNodeTest(
+      BuildMultipleQuantizeProgramDesc(use_mkldnn, first_scale, second_scale),
+      remove_nodes);
+}
+
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
-- 
GitLab