From ae80df915533fb1f83cd2c72f2eafead531a3ba3 Mon Sep 17 00:00:00 2001
From: "joanna.wozna.intel" <joanna.wozna@intel.com>
Date: Mon, 16 Aug 2021 10:01:52 +0200
Subject: [PATCH] Fix elementwise_add quantization (#34820)

* Remove force_fp32_output from elementwise_add quantization

* Fix cpu_quantize_placement test

* Review related changes
---
 .../framework/ir/graph_pattern_detector.cc    |  6 +++---
 .../framework/ir/mkldnn/cpu_quantize_pass.cc  | 19 ++++++++-----------
 .../ir/mkldnn/cpu_quantize_pass_tester.cc     |  7 +++----
 .../cpu_quantize_placement_pass_tester.cc     |  8 ++++----
 4 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index 70e48755dcd..b4c94010e48 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -2249,9 +2249,9 @@ PDNode *patterns::MultipleQuantize::operator()() {
 PDNode *patterns::QuantizePlacement::operator()(
     const std::unordered_set<std::string> &quantize_enabled_op_types) {
   std::unordered_set<std::string> supported_op_types =
-      std::unordered_set<std::string>(
-          {"concat", "conv2d", "elementwise_add", "fc", "matmul", "pool2d",
-           "prior_box", "relu", "reshape2", "transpose2", "fusion_gru"});
+      std::unordered_set<std::string>({"concat", "conv2d", "elementwise_add",
+                                       "fc", "matmul", "pool2d", "prior_box",
+                                       "reshape2", "transpose2", "fusion_gru"});
   if (!quantize_enabled_op_types.empty()) {
     supported_op_types = quantize_enabled_op_types;
   }
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
index 3c06c9ee41d..f50cd0a01d2 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
@@ -770,7 +770,8 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
     GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out,
                               elementwise_add_pattern);
 
-    if (!AreScalesPresentForNodes({elementwise_add_x, elementwise_add_y})) {
+    if (!AreScalesPresentForNodes(
+            {elementwise_add_x, elementwise_add_y, elementwise_add_out})) {
       LogCannotQuantizeOp(elementwise_add_op);
       return;
     }
@@ -793,16 +794,12 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
     QuantizeInput(g, elementwise_add_op, elementwise_add_y, "Y", input_y_scale,
                   is_y_unsigned, "Scale_y");
 
-    // if quantization scale is missing for output tensor, return fp32 data
-    if (AreScalesPresentForNodes({elementwise_add_out})) {
-      bool is_output_unsigned{false};
-      auto output_scale =
-          GetScaleValueForNode(elementwise_add_out, &is_output_unsigned);
-      DequantizeOutput(g, elementwise_add_op, elementwise_add_out, "Out",
-                       output_scale, is_output_unsigned, "Scale_out");
-    } else {
-      elementwise_add_op->Op()->SetAttr("force_fp32_output", true);
-    }
+    bool is_output_unsigned{false};
+    auto output_scale =
+        GetScaleValueForNode(elementwise_add_out, &is_output_unsigned);
+
+    DequantizeOutput(g, elementwise_add_op, elementwise_add_out, "Out",
+                     output_scale, is_output_unsigned, "Scale_out");
 
     ++quantize_elementwise_add_count;
   };
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
index adb431fdb09..6fcea6a66cc 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -854,13 +854,12 @@ TEST(CpuQuantizePass, elementwise_add) {
 
 TEST(CpuQuantizePass, elementwise_add_output_scale_missing) {
   int elementwise_add_count = 1;
-  int quant_count = 2;
+  int quant_count = 0;
   int dequant_count = 2;
-  // 2 Quant + 2 IN
-  int added_nodes_count = 4;
+  int added_nodes_count = 0;
   MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(),
                          elementwise_add_count, quant_count, dequant_count,
-                         added_nodes_count, 2.0f * 127, true);
+                         added_nodes_count, 1.f, true);
 }
 
 TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) {
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc
index 761defc25ff..daf913bf7d8 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc
@@ -131,13 +131,13 @@ TEST(QuantizerPlacementPass, enabled_conv_excluded_one) {
 }
 
 TEST(QuantizerPlacementPass, empty_list) {
-  // all operators quantized
-  MainTest({}, {}, 6);
+  // all operators except relu should be quantized
+  MainTest({}, {}, 5);
 }
 
 TEST(QuantizerPlacementPass, default_attr_value) {
-  //  all operators quantized
-  DefaultAttrTest(6);
+  // all operators except relu should be quantized
+  DefaultAttrTest(5);
 }
 
 }  // namespace ir
-- 
GitLab