From 9de67725103899aba775ada15d9bdb928ce2fb97 Mon Sep 17 00:00:00 2001
From: bingyanghuang <33643817+bingyanghuang@users.noreply.github.com>
Date: Sat, 28 Sep 2019 20:05:13 +0800
Subject: [PATCH] Follow comment of Merged QAT PR 18970 (#19979)

* Follow Wangzhen's comment in PR 18970, test=develop

* Review comments, test=develop

* Leave fake quantization around mul

test=develop

* Replace Fake with Real Quantized Mul

test=develop

* Fix bug in quantize placement pass

Nodes in the graph now have checked type instead of node name when they are to be marked for quantization test=develop
---
 .../ir/mkldnn/cpu_quantize_placement_pass.cc  |  2 +-
 .../quantization/quantization_mkldnn_pass.py  | 59 ++++++++++++++-----
 2 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc
index 79a8ac68b82..2ccd4062214 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc
@@ -36,7 +36,7 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
         if (op_types_list.empty()) {
           op->SetAttr("use_quantizer", true);
         } else if (std::find(op_types_list.begin(), op_types_list.end(),
-                             n->Name()) != op_types_list.end()) {
+                             op->Type()) != op_types_list.end()) {
           op->SetAttr("use_quantizer", true);
         }
       }
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py
index 9ea03a49dad..eda62b4674d 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py
@@ -375,11 +375,15 @@ class FakeQAT2MkldnnINT8PerfPass(object):
             if op.name() in self._fake_quantize_types:
                 op_out = graph._find_node_by_name(op.outputs,
                                                   op.output("Out")[0])
-                self._remove_fake_quantize(graph, op)
+                next_op = op_out.outputs[0]
+                if next_op.name() not in self._mul_ops:
+                    self._remove_fake_quantize(graph, op)
+                else:
+                    quant_op = self._transform_to_quantize_mkldnn(graph, op)
+                    self._transform_to_mul_mkldnn(graph, next_op, quant_op)
 
         for op in graph.all_op_nodes():
             if op.name() in self._fake_dequantize_types:
-                op_in = graph._find_node_by_name(op.inputs, op.input("X")[0])
                 self._remove_fake_dequantize(graph, op)
         return graph
 
@@ -426,8 +430,6 @@ class FakeQAT2MkldnnINT8PerfPass(object):
         for op in graph.all_op_nodes():
             if op.name() in self._conv_ops:
                 self._dequantize_conv_weights(graph, op)
-            elif op.name() in self._mul_ops:
-                self._dequantize_mul_weights(graph, op)
         return graph
 
     def _dequantize_conv_weights(self, graph, op_node):
@@ -463,22 +465,20 @@ class FakeQAT2MkldnnINT8PerfPass(object):
         graph = self._apply_pass(graph, 'conv_elementwise_add_mkldnn_fuse_pass')
         graph = self._apply_pass(graph, 'conv_relu_mkldnn_fuse_pass')
         graph = self._apply_pass(graph, 'conv_relu6_mkldnn_fuse_pass')
-        graph = self._apply_pass(graph, 'fc_fuse_pass')
         return graph
 
     def _apply_pass(self, graph, pass_name, attrs=None, attr_values=None):
         ir_pass = core.get_pass(pass_name)
-        inference_program = graph.to_program()
-        ir_graph = core.Graph(inference_program.desc)
-        ir_graph.set_not_owned('__param_scope__', self._scope)
+        cpp_graph = graph.graph
+        if not cpp_graph.has('__param_scope__'):
+            cpp_graph.set_not_owned('__param_scope__', self._scope)
         if attrs:
             assert attr_values and len(attrs) == len(
                 attr_values
             ), "Different number of pass attributes and their values."
             for attr, value in zip(attrs, attr_values):
                 ir_pass.set(attr, value)
-        ir_pass.apply(ir_graph)
-        graph = IrGraph(ir_graph, for_test=True)
+        ir_pass.apply(cpp_graph)
         if self._debug:
             graph.draw('.', 'qat_fp32_{}'.format(pass_name),
                        graph.all_op_nodes())
@@ -532,15 +532,46 @@ class FakeQAT2MkldnnINT8PerfPass(object):
                     ids.append(op.id())
         return set(ids)
 
+    def _transform_to_quantize_mkldnn(self, graph, op_node):
+        """
+        Transform fake_quantize_xx op to quantize mkldnn op in the graph.
+        """
+        input_var_node = graph._find_node_by_name(op_node.inputs,
+                                                  op_node.input("X")[0])
+        output_var_node = graph._find_node_by_name(op_node.outputs,
+                                                   op_node.output("Out")[0])
+        scale_in = self._s8_max / self._load_param(
+            self._scope, op_node.input("InScale")[0])[0]
+        quant_op_node = graph.create_op_node(
+            op_type='quantize',
+            attrs={
+                'data_format': 'MKLDNNLAYOUT',
+                'use_mkldnn': 1,
+                'Scale': scale_in,
+                'is_negative_input': 1
+            },
+            inputs={'Input': input_var_node},
+            outputs={'Output': output_var_node})
+        graph.link_to(input_var_node, quant_op_node)
+        graph.link_to(quant_op_node, output_var_node)
+        graph.safe_remove_nodes(op_node)
+        return quant_op_node
+
+    def _transform_to_mul_mkldnn(self, graph, op_node, quantize_node):
+        input_name = op_node.input("X")[0]
+        scale_in = quantize_node.op().attr("Scale")
+        op_node.set_attr("scale_y", [1.0])
+        op_node.set_attr("scale_x", scale_in)
+        op_node.set_attr("scale_out", 1.0)
+        op_node.set_attr("force_fp32_output", True)
+
     def _quantize_fp32_graph(self, graph):
         ir_pass = self._core.get_pass('cpu_quantize_placement_pass')
-        inference_program = graph.to_program()
-        ir_graph = self._core.Graph(inference_program.desc)
+        cpp_graph = graph.graph
         ir_pass.set('quantize_enabled_op_types', {'conv2d', 'pool2d'})
         ir_pass.set('quantize_excluded_op_ids',
                     self._find_avg_pooling_ids(graph))
-        ir_pass.apply(ir_graph)
-        graph = IrGraph(ir_graph, for_test=True)
+        ir_pass.apply(cpp_graph)
         if self._debug:
             graph.draw('.', 'qat_int8_{}'.format(ir_pass.type()),
                        graph.all_op_nodes())
-- 
GitLab