From a44085bed80d72296118f3e97516bef340baf3aa Mon Sep 17 00:00:00 2001
From: Bai Yifan <me@ethanbai.com>
Date: Sun, 12 Jul 2020 15:15:42 +0800
Subject: [PATCH] support user defined quantization func and preprocess
 (#24720) (#25458)

* add user defined func test=develop

* update test=develop

* update test=develop

* fix name conflicts test=develop

* add unittest test=develop

* change 2018 to 2020 test=develop

* add comment test=develop

* add comment for function test=develop

* fix details test=develop

* fix details test=develop

Co-authored-by: Liufang Sang <slf12thuss@163.com>
---
 .../slim/quantization/quantization_pass.py    | 316 ++++++++++++++++--
 .../tests/test_user_defined_quantization.py   | 271 +++++++++++++++
 python/paddle/fluid/framework.py              |   8 +-
 3 files changed, 565 insertions(+), 30 deletions(-)
 create mode 100644 python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py

diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index f04869156f..d5bba8e210 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -21,6 +21,11 @@ from ....framework import IrNode
 from ....framework import Operator
 from .... import unique_name
 
+from ....framework import Program, program_guard, default_startup_program
+from ....data import data
+from ....layers import mean
+from ....executor import scope_guard
+
 __all__ = [
     'QuantizationTransformPass', 'QuantizationFreezePass', 'ConvertToInt8Pass',
     'TransformForMobilePass', 'OutScaleForTrainingPass',
@@ -163,7 +168,13 @@ class QuantizationTransformPass(object):
                  window_size=10000,
                  moving_rate=0.9,
                  skip_pattern=['skip_quant'],
-                 quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']):
+                 quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'],
+                 weight_quantize_func=None,
+                 act_quantize_func=None,
+                 weight_preprocess_func=None,
+                 act_preprocess_func=None,
+                 optimizer_func=None,
+                 executor=None):
         """
         Constructor.
 
@@ -194,6 +205,33 @@ class QuantizationTransformPass(object):
             quantizable_op_type(list[str]): List the type of ops that will be quantized. 
                 Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in
                 QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
+            weight_quantize_func(function): Function that defines how to quantize weight. Using this
+                can quickly test if user's quantization method works or not. In this function, user should
+                both define quantization function and dequantization function, that is, the function's input
+                is non-quantized weight and function returns dequantized weight. If None, will use
+                quantization op defined by 'weight_quantize_type'.
+                Default is None.
+            act_quantize_func(function): Function that defines how to quantize activation. Using this
+                can quickly test if user's quantization method works or not. In this function, user should
+                both define quantization and dequantization process, that is, the function's input
+                is non-quantized activation and function returns dequantized activation. If None, will use 
+                quantization op defined by 'activation_quantize_type'.
+                Default is None.
+            weight_preprocess_func(function): Function that defines how to preprocess weight before quantization. Using this
+                can quickly test if user's preprocess method works or not. The function's input
+                is non-quantized weight and function returns processed weight to be quantized. If None, the weight will
+                be quantized directly.
+                Default is None.
+            act_preprocess_func(function): Function that defines how to preprocess activation before quantization. Using this
+                can quickly test if user's preprocess method works or not. The function's input
+                is non-quantized activation and function returns processed activation to be quantized. If None, the activation will
+                be quantized directly.
+                Default is None.
+            optimizer_func(function): Fuction return a optimizer. When 'is_test' is False and user want to use self-defined 
+            quantization function and preprocess function, this function must be set. Default is None.
+            executor(Fluid.Executor): If user want to use self-defined quantization function and preprocess function, 
+                executor must be set for initialization. Default is None.
+
 
         Examples:
         .. code-block:: python
@@ -215,7 +253,12 @@ class QuantizationTransformPass(object):
         self._weight_bits = weight_bits
         self._activation_bits = activation_bits
         self._skip_pattern = skip_pattern
-
+        self._weight_quantize_func = weight_quantize_func
+        self._act_quantize_func = act_quantize_func
+        self._weight_preprocess_func = weight_preprocess_func
+        self._act_preprocess_func = act_preprocess_func
+        self._optimizer = optimizer_func
+        self._exe = executor
         quant_type = [
             'abs_max', 'channel_wise_abs_max', 'range_abs_max',
             'moving_average_abs_max'
@@ -249,6 +292,183 @@ class QuantizationTransformPass(object):
         self._is_test = None
         self._global_step = None
 
+        self.create_var_map = {}
+        self.create_op_map = {}
+
+    def _create_new_node(self, graph, in_node):
+        """
+        create a node that same with in_node in graph
+        Args:
+            graph(IrGraph): create node in graph.
+            in_node(IrVarNode): create node that same with in_node.
+        Returns:
+            created new node
+        """
+        key = ''
+        for inp in in_node.inputs:
+            key = key + inp.name()
+        key = key + in_node.name()
+        for inp in in_node.outputs:
+            key = key + inp.name()
+
+        if key in self.create_var_map.keys():
+            new_node = self.create_var_map[key]
+        elif in_node.is_ctrl_var():
+            new_node = graph.create_control_dep_var()
+            self.create_var_map[key] = new_node
+        else:
+            new_node = graph.create_var_node_from_desc(in_node.node.var())
+            self.create_var_map[key] = new_node
+        return new_node
+
+    def _copy_graph(self, graph, source_graph, op_node):
+        """
+        copy op_node in source_graph to graph. And will run recursively 
+        for next ops that link to op_node's outputs.
+        Args:
+            graph(IrGraph): target graph to copy.
+            source_graph(IrGraph): source graph to copy.
+            op_node(IrOpNode): op node in source_graph.
+        Returns:
+            None
+
+        """
+        key = ''
+        for inp in op_node.inputs:
+            key = key + inp.name()
+        key = key + op_node.name()
+        for inp in op_node.outputs:
+            key = key + inp.name()
+        has_created = False
+        if key in self.create_op_map.keys():
+            new_op_node = self.create_op_map[key]
+            has_created = True
+        else:
+            new_op_node = graph.create_op_node_from_desc(op_node.node.op())
+            self.create_op_map[key] = new_op_node
+        if has_created:
+            return
+        for in_node in op_node.inputs:
+            new_node = self._create_new_node(graph, in_node)
+            graph.link_to(new_node, new_op_node)
+        for in_node in op_node.outputs:
+            new_node = self._create_new_node(graph, in_node)
+            graph.link_to(new_op_node, new_node)
+        for var_node in op_node.outputs:
+            for next_op_node in var_node.outputs:
+                self._copy_graph(graph, source_graph, next_op_node)
+        return
+
+    def _insert_func(self, graph, func, var_node, op):
+        """
+        Insert a tmp program that returned by func between var_node and op.
+
+        Args:
+            graph(IrGraph): target graph to insert tmp program.
+            func(Function): function to define a tmp program
+            var_node(IrVarNode): node in target graph.
+            op(IrOpNode): op in target graph.
+        Returns:
+            op's new input that replaces var_node
+        """
+        tmp_program = Program()
+        startup_program = Program()
+        with program_guard(tmp_program, startup_program):
+            with unique_name.guard(var_node.name() + "_"):
+                in_node = data(
+                    var_node.name() + '_tmp_input',
+                    shape=var_node.shape(),
+                    dtype='float32')
+                out_node = func(in_node)
+                # loss shape must be 1 when minimize
+                loss = mean(out_node)
+                if not graph._for_test:
+                    assert self._optimizer, "optimizer_func must be set when graph is test graph"
+                    in_node.stop_gradient = False
+                    optimizer = self._optimizer()
+                    optimizer.minimize(loss)
+        with scope_guard(self._scope):
+            self._exe.run(startup_program)
+
+        tmp_graph = IrGraph(
+            core.Graph(tmp_program.desc), for_test=graph._for_test)
+        in_node = tmp_graph._find_node_by_name(tmp_graph.all_var_nodes(),
+                                               in_node.name)
+        out_node = tmp_graph._find_node_by_name(tmp_graph.all_var_nodes(),
+                                                out_node.name)
+
+        in_node_params = []
+        in_op_node = []
+        # copy tmp graph to graph, after that, we can insert tmp graph's copy to graph.
+        for node in tmp_graph.all_var_nodes():
+            if node.inputs == [] and node.persistable():
+                in_node_params.append(node)
+        for node in tmp_graph.all_op_nodes():
+            if node.inputs == []:
+                in_op_node.append(node)
+        for node in in_node.outputs:
+            self._copy_graph(graph, tmp_graph, node)
+        for node in in_node_params:
+            for op_node in node.outputs:
+                self._copy_graph(graph, tmp_graph, op_node)
+        for node in in_op_node:
+            self._copy_graph(graph, tmp_graph, node)
+
+        target_in_node = graph._find_node_by_name(graph.all_var_nodes(),
+                                                  in_node.name())
+        target_out_node = graph._find_node_by_name(graph.all_var_nodes(),
+                                                   out_node.name())
+        loss_node = graph._find_node_by_name(graph.all_var_nodes(), loss.name)
+        outputs = target_in_node.outputs
+        for node in outputs:
+            graph.update_input_link(target_in_node, var_node, node)
+        graph.update_input_link(var_node, target_out_node, op)
+
+        # update grad
+        if not graph._for_test:
+            op_out = op.outputs[0]
+            op_out_grad = graph._find_node_by_name(graph.all_var_nodes(),
+                                                   op_out.name() + "@GRAD")
+            # find op's gradient op, such as conv2d_grad
+            op_grad = op_out_grad.outputs[0]
+            target_out_grad_node = graph._find_node_by_name(
+                graph.all_var_nodes(), target_out_node.name() + "@GRAD")
+            in_node_grad = graph._find_node_by_name(
+                graph.all_var_nodes(), target_in_node.name() + "@GRAD")
+            in_node_grad_op = in_node_grad.inputs
+            # update op_grad's input
+            graph.update_input_link(var_node, target_out_node, op_grad)
+
+            op_grad_out = None
+            # find var_node's corresponding grad node
+            for node in op_grad.outputs:
+                if var_node.name() + "@GRAD" in node.name():
+                    op_grad_out = node
+            # update op_grad's output
+            if op_grad_out is not None:
+                graph.update_output_link(op_grad_out, target_out_grad_node,
+                                         op_grad)
+            else:
+                graph.link_to(op_grad, target_out_grad_node)
+
+            for node in in_node_grad_op:
+                graph.update_input_link(target_in_node, var_node, node)
+                if op_grad_out:
+                    graph.update_output_link(in_node_grad, op_grad_out, node)
+            # remove useless nodes
+            mean_grad = target_out_grad_node.inputs[0]
+            mean_out_grad = mean_grad.inputs[0]
+            fill_constant_node = mean_out_grad.inputs[0]
+            graph.safe_remove_nodes(mean_grad)
+            graph.safe_remove_nodes(mean_out_grad)
+            graph.safe_remove_nodes(fill_constant_node)
+            graph.safe_remove_nodes(in_node_grad)
+
+        graph.safe_remove_nodes(loss_node.inputs[0])
+        graph.safe_remove_nodes(loss_node)
+        graph.safe_remove_nodes(target_in_node)
+        return target_out_node
+
     def apply(self, graph):
         """
         Quantize the graph for training process. According to weight and
@@ -266,6 +486,7 @@ class QuantizationTransformPass(object):
         # marked the variable which has been dequantized.
         dequantized_vars = collections.OrderedDict()
         persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
+        processed_vars = []
 
         def _quant_preprocess(op_node):
             user_skipped = False
@@ -281,37 +502,75 @@ class QuantizationTransformPass(object):
 
         def _transform_forward(graph, op):
             op.op()._set_attr("quantization_type", "qat_with_weight")
-            for var_node in op.inputs:
+            inputs = op.inputs
+            for var_node in inputs:
                 if var_node.name() not in op.input_arg_names():
                     continue
                 if var_node.name() in dequantized_vars:
                     dequant_var_node = dequantized_vars[var_node.name()]
                 else:
+
+                    name = var_node.name()
+                    if name in processed_vars:
+                        continue
+
+                    if var_node.name() in persistable_vars:
+                        is_weight = True
+                    else:
+                        is_weight = False
+
+                    # if var node is weight and weight_preprocess_func is not None,
+                    # will insert weight preprocess func 
+                    # to preorocess weight before quantization
+                    # if var node is activation and act_preprocess_func is not None, 
+                    # will insert activation preprocess func 
+                    # to preorocess activation before quantization
+                    if is_weight and self._weight_preprocess_func is not None:
+                        var_node = self._insert_func(
+                            graph, self._weight_preprocess_func, var_node, op)
+                    elif not is_weight and self._act_preprocess_func is not None:
+                        var_node = self._insert_func(
+                            graph, self._act_preprocess_func, var_node, op)
+
+                    # if var node is weight and weight_quantize_func is not None,
+                    # will insert weight quantize func to quantize and dequantize weight
+                    # if var node is activation and act_quantize_func is not None,
+                    # will insert act quantize func to quantize and dequantize activation
+                    if is_weight and self._weight_quantize_func is not None:
+                        target_out_node = self._insert_func(
+                            graph, self._weight_quantize_func, var_node, op)
+                        processed_vars.append(name)
+                        continue
+                    elif not is_weight and self._act_quantize_func is not None:
+                        target_out_node = self._insert_func(
+                            graph, self._act_quantize_func, var_node, op)
+                        processed_vars.append(name)
+                        continue
+
                     quant_bits = self._weight_bits if var_node.name() in persistable_vars \
                         else self._activation_bits
-                    quant_type = self._weight_quantize_type if var_node.name() \
-                        in persistable_vars else self._activation_quantize_type
+                    quant_type = self._weight_quantize_type if is_weight \
+                        else self._activation_quantize_type
                     if quant_type == 'channel_wise_abs_max':
-                        assert var_node.name(
-                        ) in persistable_vars, "'channel_wise_abs_max' can only be applied on weights."
+                        assert is_weight, "'channel_wise_abs_max' can only be applied on weights."
                         if op.name() in self._conv_ops:
                             quant_var_node, scale_var_node = self._insert_channel_quant_op(
-                                graph, var_node, quant_bits)
+                                graph, var_node, name, quant_bits)
                             dequant_var_node = self._insert_channel_dequant_op(
                                 graph, quant_var_node, [scale_var_node],
                                 [quant_bits])
                         else:
                             quant_var_node, scale_var_node = self._insert_quant_op(
-                                graph, var_node, quant_bits, 'abs_max')
+                                graph, var_node, name, quant_bits, 'abs_max')
                             dequant_var_node = self._insert_dequant_op(
                                 graph, quant_var_node, scale_var_node,
                                 quant_bits)
                     else:
                         quant_var_node, scale_var_node = self._insert_quant_op(
-                            graph, var_node, quant_bits, quant_type)
+                            graph, var_node, name, quant_bits, quant_type)
                         dequant_var_node = self._insert_dequant_op(
                             graph, quant_var_node, scale_var_node, quant_bits)
-                    dequantized_vars[var_node.name()] = dequant_var_node
+                    dequantized_vars[name] = dequant_var_node
                 graph.update_input_link(var_node, dequant_var_node, op)
 
         def _transform_backward(graph, op):
@@ -379,32 +638,33 @@ class QuantizationTransformPass(object):
                 graph.link_to(increment_op, global_step_out)
                 self._global_step = global_step_out
 
-    def _insert_quant_op(self, graph, var_node, quant_bits, quant_type):
+    def _insert_quant_op(self, graph, var_node, name, quant_bits, quant_type):
         """
         Insert fake_quantize_op in the graph.
         """
         if quant_type == 'abs_max':
-            return self._insert_quant_abs_max_op(graph, var_node, quant_bits)
+            return self._insert_quant_abs_max_op(graph, var_node, name,
+                                                 quant_bits)
         elif quant_type == 'range_abs_max':
-            return self._insert_quant_range_abs_max_op(graph, var_node,
+            return self._insert_quant_range_abs_max_op(graph, var_node, name,
                                                        quant_bits)
         elif quant_type == 'moving_average_abs_max':
-            return self._insert_quant_moving_average_abs_max_op(graph, var_node,
-                                                                quant_bits)
+            return self._insert_quant_moving_average_abs_max_op(
+                graph, var_node, name, quant_bits)
 
-    def _insert_quant_abs_max_op(self, graph, var_node, quant_bits):
+    def _insert_quant_abs_max_op(self, graph, var_node, name, quant_bits):
         """
         Insert fake_quantize_abs_max op in the graph.
         """
         assert var_node.is_var(), '{} is not a var'.format(var_node.name())
 
         quant_var_node = graph.create_var_node(
-            name=self._quantized_var_name(var_node.name()),
+            name=self._quantized_var_name(name),
             var_type=var_node.type(),
             shape=var_node.shape(),
             var_dtype=var_node.dtype())
         scale_var_node = graph.create_var_node(
-            name=self._quantized_scale_name(var_node.name()),
+            name=self._quantized_scale_name(name),
             var_type=var_node.type(),
             shape=[1],
             var_dtype=var_node.dtype())
@@ -422,20 +682,20 @@ class QuantizationTransformPass(object):
         graph.link_to(quant_op_node, scale_var_node)
         return quant_var_node, scale_var_node
 
-    def _insert_quant_range_abs_max_op(self, graph, var_node, quant_bits):
+    def _insert_quant_range_abs_max_op(self, graph, var_node, name, quant_bits):
         """
         Insert fake_quantize_range_abs_max on the graph.
         """
         assert var_node.is_var(), '{} is not a var'.format(var_node.name())
 
         quant_var_node = graph.create_var_node(
-            name=self._quantized_var_name(var_node.name()),
+            name=self._quantized_var_name(name),
             var_type=var_node.type(),
             shape=var_node.shape(),
             var_dtype=var_node.dtype())
 
         scale_in_node = graph.create_persistable_node(
-            name=self._quantized_scale_name(var_node.name()),
+            name=self._quantized_scale_name(name),
             var_type=core.VarDesc.VarType.LOD_TENSOR,
             shape=[1],
             var_dtype=var_node.dtype())
@@ -493,17 +753,17 @@ class QuantizationTransformPass(object):
 
         return quant_var_node, scale_out_node
 
-    def _insert_quant_moving_average_abs_max_op(self, graph, var_node,
+    def _insert_quant_moving_average_abs_max_op(self, graph, var_node, name,
                                                 quant_bits):
         """Insert fake_quantize_moving_average_abs_max
         """
         quant_var_node = graph.create_var_node(
-            name=self._quantized_var_name(var_node.name()),
+            name=self._quantized_var_name(name),
             var_type=var_node.type(),
             shape=var_node.shape(),
             var_dtype=var_node.dtype())
         scale_in_node = graph.create_persistable_node(
-            name=self._quantized_scale_name(var_node.name()),
+            name=self._quantized_scale_name(name),
             var_type=core.VarDesc.VarType.LOD_TENSOR,
             shape=[1],
             var_dtype=var_node.dtype())
@@ -580,19 +840,19 @@ class QuantizationTransformPass(object):
 
         return quant_var_node, scale_out_node
 
-    def _insert_channel_quant_op(self, graph, var_node, quant_bits):
+    def _insert_channel_quant_op(self, graph, var_node, name, quant_bits):
         """
         Insert fake_channel_wise_quantize_abs_max op in the graph.
         """
         assert var_node.is_var(), '{} is not a var'.format(var_node.name())
 
         quant_var_node = graph.create_var_node(
-            name=self._quantized_var_name(var_node.name()),
+            name=self._quantized_var_name(name),
             var_type=var_node.type(),
             shape=var_node.shape(),
             var_dtype=var_node.dtype())
         scale_var_node = graph.create_var_node(
-            name=self._quantized_scale_name(var_node.name()),
+            name=self._quantized_scale_name(name),
             var_type=var_node.type(),
             shape=[var_node.shape()[0]],
             var_dtype=var_node.dtype())
diff --git a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
new file mode 100644
index 0000000000..6f8d84a20a
--- /dev/null
+++ b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
@@ -0,0 +1,271 @@
+#   copyright (c) 2020 paddlepaddle authors. all rights reserved.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+import os
+import unittest
+import random
+import numpy as np
+import six
+import paddle.fluid as fluid
+import paddle
+from paddle.fluid.framework import IrGraph
+from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
+from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
+from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass
+from paddle.fluid.contrib.slim.quantization import OutScaleForInferencePass
+from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass
+from paddle.fluid import core
+from paddle.fluid.layer_helper import LayerHelper
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+os.environ["CPU_NUM"] = "1"
+
+
+def residual_block(img, label, num=1):
+    def conv_bn_layer(input,
+                      ch_out,
+                      filter_size,
+                      stride,
+                      padding,
+                      act='relu',
+                      bias_attr=False):
+        tmp = fluid.layers.conv2d(
+            input=input,
+            filter_size=filter_size,
+            num_filters=ch_out,
+            stride=stride,
+            padding=padding,
+            act=None,
+            bias_attr=bias_attr)
+        return fluid.layers.batch_norm(input=tmp, act=act)
+
+    hidden = img
+    for _ in six.moves.xrange(num):
+        conv = conv_bn_layer(hidden, 20, 3, 1, 1, act=None, bias_attr=True)
+        short = conv_bn_layer(hidden, 20, 1, 1, 0, act=None)
+        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
+    fc = fluid.layers.fc(input=hidden, size=10, act='softmax')
+    loss = fluid.layers.cross_entropy(input=fc, label=label)
+    loss = fluid.layers.mean(loss)
+    return loss
+
+
+def pact(x, name=None):
+    helper = LayerHelper("pact", **locals())
+    dtype = 'float32'
+    init_thres = 20
+    u_param_attr = fluid.ParamAttr(
+        name=x.name + '_pact',
+        initializer=fluid.initializer.ConstantInitializer(value=init_thres),
+        regularizer=fluid.regularizer.L2Decay(0.0001),
+        learning_rate=1)
+    u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype)
+    x = fluid.layers.elementwise_sub(
+        x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param)))
+    x = fluid.layers.elementwise_add(
+        x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x)))
+
+    return x
+
+
+class TestUserDefinedQuantization(unittest.TestCase):
+    def quantization_scale(self,
+                           use_cuda,
+                           seed,
+                           activation_quant_type,
+                           weight_quant_type='abs_max',
+                           for_ci=False,
+                           act_preprocess_func=None,
+                           weight_preprocess_func=None,
+                           act_quantize_func=None,
+                           weight_quantize_func=None):
+        def build_program(main, startup, is_test):
+            main.random_seed = seed
+            startup.random_seed = seed
+            with fluid.unique_name.guard():
+                with fluid.program_guard(main, startup):
+                    img = fluid.layers.data(
+                        name='image', shape=[1, 28, 28], dtype='float32')
+                    img.stop_gradient = False
+                    label = fluid.layers.data(
+                        name='label', shape=[1], dtype='int64')
+                    loss = residual_block(img, label, 1)
+                    if not is_test:
+                        opt = fluid.optimizer.SGD(learning_rate=0.0001)
+                        opt.minimize(loss)
+            return [img, label], loss
+
+        def get_optimizer():
+            return fluid.optimizer.MomentumOptimizer(0.0001, 0.9)
+
+        random.seed(0)
+        np.random.seed(0)
+
+        main = fluid.Program()
+        startup = fluid.Program()
+        test_program = fluid.Program()
+        feeds, loss = build_program(main, startup, False)
+        build_program(test_program, startup, True)
+        test_program = test_program.clone(for_test=True)
+        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
+        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
+
+        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        scope = fluid.Scope()
+        with fluid.scope_guard(scope):
+            exe.run(startup)
+        train_transform_pass = QuantizationTransformPass(
+            scope=scope,
+            place=place,
+            activation_quantize_type=activation_quant_type,
+            weight_quantize_type=weight_quant_type,
+            act_preprocess_func=act_preprocess_func,
+            weight_preprocess_func=weight_preprocess_func,
+            act_quantize_func=act_quantize_func,
+            weight_quantize_func=weight_quantize_func,
+            optimizer_func=get_optimizer,
+            executor=exe)
+        train_transform_pass.apply(main_graph)
+        test_transform_pass = QuantizationTransformPass(
+            scope=scope,
+            place=place,
+            activation_quantize_type=activation_quant_type,
+            weight_quantize_type=weight_quant_type,
+            act_preprocess_func=act_preprocess_func,
+            weight_preprocess_func=weight_preprocess_func,
+            act_quantize_func=act_quantize_func,
+            weight_quantize_func=weight_quantize_func,
+            optimizer_func=get_optimizer,
+            executor=exe)
+
+        test_transform_pass.apply(test_graph)
+
+        add_quant_dequant_pass = AddQuantDequantPass(scope=scope, place=place)
+        add_quant_dequant_pass.apply(main_graph)
+        add_quant_dequant_pass.apply(test_graph)
+
+        scale_training_pass = OutScaleForTrainingPass(scope=scope, place=place)
+        scale_training_pass.apply(main_graph)
+
+        dev_name = '_gpu' if use_cuda else '_cpu'
+
+        build_strategy = fluid.BuildStrategy()
+        build_strategy.memory_optimize = False
+        build_strategy.enable_inplace = False
+        build_strategy.fuse_all_reduce_ops = False
+        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
+            loss_name=loss.name, build_strategy=build_strategy)
+        iters = 5
+        batch_size = 8
+
+        train_reader = paddle.batch(
+            paddle.reader.shuffle(
+                paddle.dataset.mnist.train(), buf_size=500),
+            batch_size=batch_size)
+        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
+        with fluid.scope_guard(scope):
+            for _ in range(iters):
+                data = next(train_reader())
+                loss_v = exe.run(binary,
+                                 feed=feeder.feed(data),
+                                 fetch_list=[loss])
+
+    def test_act_preprocess_cuda(self):
+        if fluid.core.is_compiled_with_cuda():
+            with fluid.unique_name.guard():
+                self.quantization_scale(
+                    True,
+                    seed=1,
+                    activation_quant_type='moving_average_abs_max',
+                    weight_quant_type='channel_wise_abs_max',
+                    for_ci=True,
+                    act_preprocess_func=pact)
+
+    def test_act_preprocess_cpu(self):
+        with fluid.unique_name.guard():
+            self.quantization_scale(
+                False,
+                seed=2,
+                activation_quant_type='moving_average_abs_max',
+                weight_quant_type='channel_wise_abs_max',
+                for_ci=True,
+                act_preprocess_func=pact)
+
+    def test_weight_preprocess_cuda(self):
+        if fluid.core.is_compiled_with_cuda():
+            with fluid.unique_name.guard():
+                self.quantization_scale(
+                    True,
+                    seed=1,
+                    activation_quant_type='moving_average_abs_max',
+                    weight_quant_type='channel_wise_abs_max',
+                    for_ci=True,
+                    weight_preprocess_func=pact)
+
+    def test_weight_preprocess_cpu(self):
+        with fluid.unique_name.guard():
+            self.quantization_scale(
+                False,
+                seed=2,
+                activation_quant_type='moving_average_abs_max',
+                weight_quant_type='channel_wise_abs_max',
+                for_ci=True,
+                weight_preprocess_func=pact)
+
+    def test_act_quantize_cuda(self):
+        if fluid.core.is_compiled_with_cuda():
+            with fluid.unique_name.guard():
+                self.quantization_scale(
+                    True,
+                    seed=1,
+                    activation_quant_type='moving_average_abs_max',
+                    weight_quant_type='channel_wise_abs_max',
+                    for_ci=True,
+                    act_quantize_func=pact)
+
+    def test_act_quantize_cpu(self):
+        with fluid.unique_name.guard():
+            self.quantization_scale(
+                False,
+                seed=2,
+                activation_quant_type='moving_average_abs_max',
+                weight_quant_type='channel_wise_abs_max',
+                for_ci=True,
+                act_quantize_func=pact)
+
+    def test_weight_quantize_cuda(self):
+        if fluid.core.is_compiled_with_cuda():
+            with fluid.unique_name.guard():
+                self.quantization_scale(
+                    True,
+                    seed=1,
+                    activation_quant_type='moving_average_abs_max',
+                    weight_quant_type='channel_wise_abs_max',
+                    for_ci=True,
+                    weight_quantize_func=pact)
+
+    def test_weight_quantize_cpu(self):
+        with fluid.unique_name.guard():
+            self.quantization_scale(
+                False,
+                seed=2,
+                activation_quant_type='moving_average_abs_max',
+                weight_quant_type='channel_wise_abs_max',
+                for_ci=True,
+                weight_quantize_func=pact)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index d88ba16b76..06adbd038b 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -3152,8 +3152,6 @@ class IrOpNode(IrNode):
         """
         assert self.node.op() is not None, \
             "The node operator description can not be None."
-        print("op: {}, old: {}, new: {}\n".format(self.node.op().type(
-        ), old_output_name, new_output_name))
         self.node.op()._rename_output(old_output_name, new_output_name)
 
     def input(self, name):
@@ -3377,6 +3375,12 @@ class IrGraph(object):
         var_desc.set_dtype(var_dtype)
         return IrVarNode(self.graph.create_var_node(var_desc))
 
+    def create_control_dep_var(self):
+        """
+        create a control var
+        """
+        return IrVarNode(self.graph.create_control_dep_var())
+
     def create_var_node_from_desc(self, var_desc):
         """
         Create a variable node by using an existing VarDesc in the graph.
-- 
GitLab