From 6bbb6e7f458bc9dae8c8af91a147f1faee7bde19 Mon Sep 17 00:00:00 2001
From: guofei <52460041+gfwm2013@users.noreply.github.com>
Date: Wed, 14 Oct 2020 21:43:15 +0800
Subject: [PATCH] Implement the function of
 OutScaleForTraining/OutScaleForInference in dygraph (#26601)

* Implement the function of OueScaleForTraining/OutScaleForInference in dygraph

test=develop
---
 .../ir/shuffle_channel_detect_pass.cc         |   2 +
 paddle/fluid/pybind/op_function_generator.cc  |   3 +
 .../slim/quantization/imperative/qat.py       | 229 ++++++++-
 .../slim/quantization/imperative/quant_nn.py  |  78 ++-
 .../slim/tests/test_imperative_out_scale.py   | 461 ++++++++++++++++++
 .../test_moving_average_abs_max_scale_op.py   |  83 ++++
 6 files changed, 850 insertions(+), 6 deletions(-)
 create mode 100644 python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
 create mode 100644 python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc
index 92e995579fa..b9bd660043b 100644
--- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc
+++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <string>
+
 #include "paddle/fluid/framework/ir/shuffle_channel_detect_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 
diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc
index ee6e541c9e6..8288f1852c2 100644
--- a/paddle/fluid/pybind/op_function_generator.cc
+++ b/paddle/fluid/pybind/op_function_generator.cc
@@ -51,6 +51,7 @@ std::map<std::string, std::set<std::string>> op_ins_map = {
     {"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}},
     {"hierarchical_sigmoid",
      {"X", "W", "Label", "PathTable", "PathCode", "Bias"}},
+    {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}},
 };
 
 // NOTE(zhiqiu): Like op_ins_map.
@@ -75,6 +76,7 @@ std::map<std::string, std::set<std::string>> op_outs_map = {
     {"collect_fpn_proposals", {"FpnRois", "RoisNum"}},
     {"distribute_fpn_proposals",
      {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}},
+    {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}},
 };
 
 // NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are
@@ -118,6 +120,7 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
     {"check_finite_and_unscale", {"Out", "FoundInfinite"}},
     {"update_loss_scaling",
      {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}},
+    {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}},
 };
 
 // clang-format off
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
index 8d7ebcf4caa..7fc177e7ad7 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -15,18 +15,37 @@
 import logging
 import numpy as np
 import sys
+import os
 import paddle
-from paddle.fluid import dygraph
-from paddle.fluid.dygraph.nn import Conv2D
-from paddle.fluid.dygraph.nn import Linear
+from paddle.fluid import dygraph, core, framework
+from paddle.fluid.executor import Executor
+from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.fluid.dygraph.nn import Conv2D, Linear, BatchNorm, Pool2D, Conv2DTranspose
+from paddle.fluid.io import load_inference_model, save_inference_model
+from paddle.nn.layer.activation import ReLU, LeakyReLU, Sigmoid, ReLU6, Tanh, Softmax, PReLU
 from paddle.fluid.log_helper import get_logger
 from . import quant_nn
 
-__all__ = ['ImperativeQuantAware']
+__all__ = ['ImperativeQuantAware', 'ImperativeCalcOutScale']
 
 _logger = get_logger(
     __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
 
+_op_real_in_out_name = {
+    "conv2d": [["Input", "Filter"], ["Output"]],
+    "conv2d_transpose": [["Input", "Filter"], ["Output"]],
+    "pool2d": [["X"], ["Out"]],
+    "elementwise_add": [["X", "Y"], ["Out"]],
+    "softmax": [["X"], ["Out"]],
+    "relu": [["X"], ["Out"]],
+    "relu6": [["X"], ["Out"]],
+    "leaky_relu": [["X"], ["Out"]],
+    "prelu": [["X"], ["Out"]],
+    "tanh": [["X"], ["Out"]],
+    "batch_norm": [["X"], ["Y"]],
+    "sigmoid": [["X"], ["Out"]],
+}
+
 
 class ImperativeQuantAware(object):
     """
@@ -141,7 +160,6 @@ class ImperativeQuantAware(object):
         for name, layer in model.named_sublayers():
             if not isinstance(layer, self._quantizable_layer_type):
                 continue
-
             scopes = name.split('.')
             target = scopes[-1]
             obj = model
@@ -173,3 +191,204 @@ class ImperativeQuantAware(object):
             layer, self._weight_bits, self._activation_bits, self._moving_rate,
             self._weight_quantize_type, self._activation_quantize_type)
         return quantized_layer
+
+
+class ImperativeCalcOutScale(object):
+    def __init__(self,
+                 moving_rate=0.9,
+                 target_layer_types=[
+                     'BatchNorm', 'Conv2D', 'Conv2DTranspose', 'LeakyReLU',
+                     'Linear', 'PReLU', 'Pool2D', 'ReLU', 'ReLU6', 'Sigmoid',
+                     'Softmax', 'Tanh'
+                 ]):
+        """
+        Add the logic of calculating and setting output quantization scales of some layers.
+        These output quantization scales may be used by tensorRT or some other inference engines.
+
+        Args:
+            moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
+            quantizable_op_type(list[str]): List the type of layers that will be calculated out_scale. 
+                Default is ['Conv2D', 'ReLU', 'PReLU', 'LeakyReLU', 'Linear', 'Sigmoid', 'BatchNorm', 'ReLU6', 'Tanh', 'Softmax', 'Conv2DTranspose']
+        """
+        super(ImperativeCalcOutScale, self).__init__()
+        self._moving_rate = moving_rate
+        self._out_scale_layers_map = {
+            'BatchNorm': BatchNorm,
+            'Conv2D': Conv2D,
+            'Conv2DTranspose': Conv2DTranspose,
+            'LeakyReLU': LeakyReLU,
+            'Linear': Linear,
+            'PReLU': PReLU,
+            'Pool2D': Pool2D,
+            'ReLU': ReLU,
+            'ReLU6': ReLU6,
+            'Sigmoid': Sigmoid,
+            'Softmax': Softmax,
+            'Tanh': Tanh
+        }
+        self._out_scale_layer_type = tuple(
+            self._out_scale_layers_map[layer]
+            if layer in self._out_scale_layers_map else layer
+            for layer in target_layer_types)
+        for layer in self._out_scale_layer_type:
+            assert not isinstance(
+                layer, str), "{} is unspported to be out_scaled.".format(layer)
+        self._register_hook_handle_list = []
+        self._out_scale_dict = {}
+
+    def calc_out_scale(self, model):
+        """
+        Insert the `moving_average_abs_max_scale` op to calculate output scale of Specific layers in model.
+
+        Args:
+            model(fluid.dygraph.Layer): The target model which would be calculate the output quantization scale.
+
+        Returns:
+            None
+        """
+        assert isinstance(
+            model, dygraph.Layer), "model must be the instance of dygraph.Layer"
+        for _, layer in model.named_sublayers():
+            if not isinstance(layer, self._out_scale_layer_type):
+                continue
+            forward_post_hook_handle = layer.register_forward_post_hook(
+                self._forward_post_hook)
+            self._register_hook_handle_list.append(forward_post_hook_handle)
+
+    # Get the output var name of the op
+    def _get_op_output_names(self, op):
+        assert isinstance(
+            op, framework.Operator), "The input op should be Operator."
+        var_names = []
+        name_list = _op_real_in_out_name[op.type][1]
+        for name in name_list:
+            var_name = op.output(name)
+            if isinstance(var_name, list):
+                var_names.extend(var_name)
+            else:
+                var_names.append(var_name)
+        return var_names
+
+    def save_quantized_model(self, layer, path, input_spec=None, **config):
+        """
+        Save the quantized model for the inference.
+
+        Args:
+            layer (Layer): The Layer to be saved.
+            path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``.
+            input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward 
+                method, which can be described by InputSpec or example Tensor. If None, all input variables of 
+                the original Layer's forward method would be the inputs of the saved model. Default None.
+            **configs (dict, optional): Other save configuration options for compatibility. We do not 
+                recommend using these configurations, they may be removed in the future. If not necessary, 
+                DO NOT use them. Default None.
+                The following options are currently supported:
+                (1) output_spec (list[Tensor]): Selects the output targets of the saved model.
+                By default, all return variables of original Layer's forward method are kept as the 
+                output of the saved model. If the provided ``output_spec`` list is not all output variables, 
+                the saved model will be pruned according to the given ``output_spec`` list. 
+
+        Returns:
+            None
+        """
+
+        assert isinstance(
+            layer, dygraph.Layer), "model must be the instance of dygraph.Layer"
+        with dygraph.guard():
+            layer.eval()
+            for handle in self._register_hook_handle_list:
+                handle.remove()
+            for key in self._out_scale_dict:
+                self._out_scale_dict[key] = float(self._out_scale_dict[key]
+                                                  .numpy())
+
+        paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config)
+
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+        else:
+            place = core.CPUPlace()
+        exe = Executor(place)
+
+        file_prefix = os.path.basename(path)
+        dirname = os.path.dirname(path)
+        model_filename = file_prefix + INFER_MODEL_SUFFIX
+        params_filename = file_prefix + INFER_PARAMS_SUFFIX
+
+        [inference_program, feed_target_names, fetch_targets] = (
+            load_inference_model(
+                dirname=dirname,
+                executor=exe,
+                model_filename=model_filename,
+                params_filename=params_filename))
+
+        # Traverse all ops in the program and find out the op matching
+        # the Layer in the dynamic graph.
+        layer_var_dict = {}
+        for block in inference_program.blocks:
+            for op in block.ops:
+                if op.type in _op_real_in_out_name:
+                    output_var_names = self._get_op_output_names(op)
+                    for output_var_name in output_var_names:
+                        output_var_tensor = block.var(output_var_name)
+                        if output_var_tensor.dtype not in [
+                                core.VarDesc.VarType.FP64,
+                                core.VarDesc.VarType.FP32
+                        ]:
+                            continue
+                        # Because the Layer in dygraph may correspond to multiple ops
+                        # in static program after being saved. To ensure correctness,
+                        # the outscale collected for output of dygraph Layer can only
+                        # be set to the last op in the corresponding ops in static program.
+                        #
+                        # We can judge the execution order of the ops which corresponding
+                        # to dygraph Layer by the name of output. And use dict to save
+                        # the corresponding relationship between the dygraph Layer and the
+                        # static graph op that needs to set the outscale attribute.
+                        dynamic_layer_name, var_name_suffix = output_var_name.split(
+                            ".")
+                        if dynamic_layer_name in layer_var_dict:
+                            if layer_var_dict[dynamic_layer_name][
+                                    0] < var_name_suffix:
+                                layer_var_dict[dynamic_layer_name] = [
+                                    var_name_suffix, op
+                                ]
+                        else:
+                            layer_var_dict[
+                                dynamic_layer_name] = [var_name_suffix, op]
+
+        # Because the naming styles of static and dynamic graph are different,
+        # in order to avoid mistakes, we unify the name here.
+        for (layer_name, var_name_op_list) in layer_var_dict.items():
+            if 'prelu' in layer_name:
+                layer_name = layer_name.replace('prelu', 'p_re_lu')
+            if 'relu' in layer_name:
+                layer_name = layer_name.replace('relu', 're_lu')
+            if layer_name not in self._out_scale_dict:
+                continue
+            var_name_op_list[1]._set_attr('out_threshold',
+                                          self._out_scale_dict[layer_name])
+
+        # Save the processed program.
+        save_inference_model(
+            dirname=dirname,
+            feeded_var_names=feed_target_names,
+            target_vars=fetch_targets,
+            executor=exe,
+            main_program=inference_program.clone(),
+            model_filename=model_filename,
+            params_filename=params_filename)
+
+    def _forward_post_hook(self, layer, input, output):
+        assert isinstance(
+            output, core.VarBase
+        ), "Multiple outputs are not currently supported in ImperativeOutScale."
+        if output.dtype not in [
+                core.VarDesc.VarType.FP32, core.VarDesc.VarType.FP64
+        ]:
+            return
+        if not hasattr(layer, "_out_scale"):
+            layer._out_scale = quant_nn.MovingAverageAbsMaxScale(
+                output.name, self._moving_rate, output.dtype)
+        scale_out = layer._out_scale(output)
+        self._out_scale_dict[layer.full_name()] = scale_out
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
index 2e35ac288c7..bbaae56439e 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -24,7 +24,8 @@ from paddle.fluid.data_feeder import check_variable_and_dtype
 
 __all__ = [
     'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D',
-    'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax'
+    'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax',
+    'MovingAverageAbsMaxScale'
 ]
 
 
@@ -494,3 +495,78 @@ class QuantizedLinear(layers.Layer):
         else:
             pre_activation = mul_out
         return self._helper.append_activation(pre_activation, act=self._act)
+
+
+class MovingAverageAbsMaxScale(layers.Layer):
+    def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
+        """
+        MovingAverageMaxScale layer is used to calculating the output quantization scale of Layer.
+        Its computational formula is described as below:
+
+        :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
+        :math:`Out = X`
+        """
+        super(MovingAverageAbsMaxScale, self).__init__()
+        self._moving_rate = moving_rate
+        self._dtype = dtype
+
+        scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale'
+        name = unique_name.generate(scale_prefix)
+        scale_attr = ParamAttr(
+            name=name, initializer=Constant(1), trainable=False)
+        self._scale = self.create_parameter(
+            shape=[1], attr=scale_attr, dtype=self._dtype)
+        self._scale.stop_gradient = True
+
+        state_prefix = "{}.state".format(name) if name else 'outscale.state'
+        state_attr = ParamAttr(
+            name=unique_name.generate(state_prefix),
+            initializer=Constant(1),
+            trainable=False)
+        self._state = self.create_parameter(
+            shape=[1], attr=state_attr, dtype=self._dtype)
+        self._state.stop_gradient = True
+
+        accum_prefix = "{}.accum".format(name) if name else 'outscale.accum'
+        accum_attr = ParamAttr(
+            name=unique_name.generate(accum_prefix),
+            initializer=Constant(1),
+            trainable=False)
+        self._accum = self.create_parameter(
+            shape=[1], attr=accum_attr, dtype=self._dtype)
+        self._accum.stop_gradient = True
+        MovingAverageAbsMaxScale._has_create = True
+
+    def forward(self, input):
+        if in_dygraph_mode():
+            attrs = ('moving_rate', self._moving_rate, 'is_test',
+                     not self.training)
+            state = self._state if self.training else None
+            accum = self._accum if self.training else None
+
+            out_scale, _, _ = core.ops.moving_average_abs_max_scale(
+                input, accum, state, self._scale, state, accum, *attrs)
+            return out_scale
+
+        check_variable_and_dtype(input, 'input', ['float32', 'float64'],
+                                 'MovingAverageAbsMaxScale')
+
+        scale_out = self._scale
+        attrs = {'moving_rate': self._moving_rate, 'is_test': not self.training}
+
+        inputs = {"X": [input]}
+        outputs = {"OutScale": [scale_out]}
+
+        if self.training:
+            inputs['InState'] = [self._state]
+            inputs['InAccum'] = [self._accum]
+            outputs['OutState'] = [self._state]
+            outputs['OutAccum'] = [self._accum]
+
+        self._helper.append_op(
+            type="moving_average_abs_max_scale",
+            inputs=inputs,
+            outputs=outputs,
+            attrs=attrs)
+
+        return scale_out
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
new file mode 100644
index 00000000000..3fc8352493d
--- /dev/null
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
@@ -0,0 +1,461 @@
+#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+from __future__ import print_function
+
+import os
+import numpy as np
+import random
+import unittest
+import logging
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+from paddle.fluid import core
+from paddle.fluid.optimizer import AdamOptimizer
+from paddle.fluid.framework import IrGraph
+from paddle.fluid.contrib.slim.quantization import ImperativeCalcOutScale
+from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass
+from paddle.fluid.dygraph.container import Sequential
+from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6
+from paddle.fluid.dygraph.nn import BatchNorm, Conv2D, Linear, Pool2D
+from paddle.fluid.log_helper import get_logger
+
+paddle.enable_static()
+
+os.environ["CPU_NUM"] = "1"
+if core.is_compiled_with_cuda():
+    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+
+_logger = get_logger(
+    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
+
+
+def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
+    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+    conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
+    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+    conv1 = fluid.layers.conv2d(
+        data,
+        num_filters=6,
+        filter_size=3,
+        stride=1,
+        padding=1,
+        param_attr=conv2d_w1_attr,
+        bias_attr=conv2d_b1_attr)
+    batch_norm1 = layers.batch_norm(conv1)
+    relu1 = layers.relu(batch_norm1)
+    pool1 = fluid.layers.pool2d(
+        relu1, pool_size=2, pool_type='max', pool_stride=2)
+    conv2 = fluid.layers.conv2d(
+        pool1,
+        num_filters=16,
+        filter_size=5,
+        stride=1,
+        padding=0,
+        param_attr=conv2d_w2_attr,
+        bias_attr=conv2d_b2_attr)
+    batch_norm2 = layers.batch_norm(conv2)
+    relu6_1 = layers.relu6(batch_norm2)
+    pool2 = fluid.layers.pool2d(
+        relu6_1, pool_size=2, pool_type='max', pool_stride=2)
+
+    fc1 = fluid.layers.fc(input=pool2,
+                          size=120,
+                          param_attr=fc_w1_attr,
+                          bias_attr=fc_b1_attr)
+    leaky_relu1 = layers.leaky_relu(fc1, alpha=0.01)
+    fc2 = fluid.layers.fc(input=leaky_relu1,
+                          size=84,
+                          param_attr=fc_w2_attr,
+                          bias_attr=fc_b2_attr)
+    sigmoid1 = layers.sigmoid(fc2)
+    fc3 = fluid.layers.fc(input=sigmoid1,
+                          size=num_classes,
+                          act=classifier_activation,
+                          param_attr=fc_w3_attr,
+                          bias_attr=fc_b3_attr)
+    return fc3
+
+
+class ImperativeLenet(fluid.dygraph.Layer):
+    def __init__(self, num_classes=10, classifier_activation='softmax'):
+        super(ImperativeLenet, self).__init__()
+        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
+        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        self.features = Sequential(
+            Conv2D(
+                num_channels=1,
+                num_filters=6,
+                filter_size=3,
+                stride=1,
+                padding=1,
+                param_attr=conv2d_w1_attr,
+                bias_attr=conv2d_b1_attr),
+            BatchNorm(6),
+            ReLU(),
+            Pool2D(
+                pool_size=2, pool_type='max', pool_stride=2),
+            Conv2D(
+                num_channels=6,
+                num_filters=16,
+                filter_size=5,
+                stride=1,
+                padding=0,
+                param_attr=conv2d_w2_attr,
+                bias_attr=conv2d_b2_attr),
+            BatchNorm(16),
+            ReLU6(),
+            Pool2D(
+                pool_size=2, pool_type='max', pool_stride=2))
+
+        self.fc = Sequential(
+            Linear(
+                input_dim=400,
+                output_dim=120,
+                param_attr=fc_w1_attr,
+                bias_attr=fc_b1_attr),
+            LeakyReLU(),
+            Linear(
+                input_dim=120,
+                output_dim=84,
+                param_attr=fc_w2_attr,
+                bias_attr=fc_b2_attr),
+            Sigmoid(),
+            Linear(
+                input_dim=84,
+                act=classifier_activation,
+                output_dim=num_classes,
+                param_attr=fc_w3_attr,
+                bias_attr=fc_b3_attr))
+
+    def forward(self, inputs):
+        x = self.features(inputs)
+
+        x = fluid.layers.flatten(x, 1)
+        x = self.fc(x)
+        return x
+
+
+class TestImperativeOutSclae(unittest.TestCase):
+    def test_calc_out_scale_save(self):
+        imperative_out_scale = ImperativeCalcOutScale()
+
+        with fluid.dygraph.guard():
+            lenet = ImperativeLenet()
+            adam = AdamOptimizer(
+                learning_rate=0.001, parameter_list=lenet.parameters())
+            train_reader = paddle.batch(
+                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
+            test_reader = paddle.batch(
+                paddle.dataset.mnist.test(), batch_size=32)
+            imperative_out_scale.calc_out_scale(lenet)
+            epoch_num = 1
+            for epoch in range(epoch_num):
+                lenet.train()
+                for batch_id, data in enumerate(train_reader()):
+                    x_data = np.array([x[0].reshape(1, 28, 28)
+                                       for x in data]).astype('float32')
+                    y_data = np.array(
+                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
+
+                    img = fluid.dygraph.to_variable(x_data)
+                    label = fluid.dygraph.to_variable(y_data)
+                    out = lenet(img)
+                    acc = fluid.layers.accuracy(out, label)
+                    loss = fluid.layers.cross_entropy(out, label)
+                    avg_loss = fluid.layers.mean(loss)
+                    avg_loss.backward()
+                    adam.minimize(avg_loss)
+                    lenet.clear_gradients()
+                    if batch_id % 100 == 0:
+                        _logger.info(
+                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
+                            format(epoch, batch_id,
+                                   avg_loss.numpy(), acc.numpy()))
+                lenet.eval()
+                for batch_id, data in enumerate(test_reader()):
+                    x_data = np.array([x[0].reshape(1, 28, 28)
+                                       for x in data]).astype('float32')
+                    y_data = np.array(
+                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
+
+                    img = fluid.dygraph.to_variable(x_data)
+                    label = fluid.dygraph.to_variable(y_data)
+
+                    out = lenet(img)
+                    acc_top1 = fluid.layers.accuracy(
+                        input=out, label=label, k=1)
+                    acc_top5 = fluid.layers.accuracy(
+                        input=out, label=label, k=5)
+
+                    if batch_id % 100 == 0:
+                        _logger.info(
+                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
+                            format(epoch, batch_id,
+                                   acc_top1.numpy(), acc_top5.numpy()))
+
+            # save weights
+            model_dict = lenet.state_dict()
+            fluid.save_dygraph(model_dict, "save_temp")
+
+            # test the correctness of `save_quantized_model`
+            data = next(test_reader())
+            test_data = np.array([x[0].reshape(1, 28, 28)
+                                  for x in data]).astype('float32')
+            test_img = fluid.dygraph.to_variable(test_data)
+            lenet.eval()
+            before_save = lenet(test_img)
+
+        # save inference quantized model
+        path = "./outscale_infer_model/lenet"
+        save_dir = "./outscale_infer_model"
+        imperative_out_scale.save_quantized_model(
+            layer=lenet,
+            path=path,
+            input_spec=[
+                paddle.static.InputSpec(
+                    shape=[None, 1, 28, 28], dtype='float32')
+            ])
+
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+        else:
+            place = core.CPUPlace()
+        exe = fluid.Executor(place)
+        [inference_program, feed_target_names, fetch_targets] = (
+            fluid.io.load_inference_model(
+                dirname=save_dir,
+                executor=exe,
+                model_filename="lenet" + INFER_MODEL_SUFFIX,
+                params_filename="lenet" + INFER_PARAMS_SUFFIX))
+        after_save, = exe.run(inference_program,
+                              feed={feed_target_names[0]: test_data},
+                              fetch_list=fetch_targets)
+
+        self.assertTrue(
+            np.allclose(after_save, before_save.numpy()),
+            msg='Failed to save the inference quantized model.')
+
+    def test_out_scale_acc(self):
+        def _build_static_lenet(main, startup, is_test=False, seed=1000):
+            with fluid.unique_name.guard():
+                with fluid.program_guard(main, startup):
+                    main.random_seed = seed
+                    startup.random_seed = seed
+                    img = fluid.layers.data(
+                        name='image', shape=[1, 28, 28], dtype='float32')
+                    label = fluid.layers.data(
+                        name='label', shape=[1], dtype='int64')
+                    prediction = StaticLenet(img)
+                    if not is_test:
+                        loss = fluid.layers.cross_entropy(
+                            input=prediction, label=label)
+                        avg_loss = fluid.layers.mean(loss)
+                    else:
+                        avg_loss = prediction
+            return img, label, avg_loss
+
+        reader = paddle.batch(
+            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
+        param_init_map = {}
+        seed = 1000
+        lr = 0.1
+        dynamic_out_scale_list = []
+        static_out_scale_list = []
+
+        # imperative train
+        _logger.info(
+            "--------------------------dynamic graph qat--------------------------"
+        )
+        imperative_out_scale = ImperativeCalcOutScale()
+
+        with fluid.dygraph.guard():
+            np.random.seed(seed)
+            fluid.default_main_program().random_seed = seed
+            fluid.default_startup_program().random_seed = seed
+            lenet = ImperativeLenet()
+            fixed_state = {}
+            for name, param in lenet.named_parameters():
+                p_shape = param.numpy().shape
+                p_value = param.numpy()
+                if name.endswith("bias"):
+                    value = np.zeros_like(p_value).astype('float32')
+                else:
+                    value = np.random.normal(
+                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
+                            p_shape).astype('float32')
+                fixed_state[name] = value
+                param_init_map[param.name] = value
+            lenet.set_dict(fixed_state)
+            imperative_out_scale.calc_out_scale(lenet)
+            adam = AdamOptimizer(
+                learning_rate=lr, parameter_list=lenet.parameters())
+            dynamic_loss_rec = []
+            lenet.train()
+            for batch_id, data in enumerate(reader()):
+                x_data = np.array([x[0].reshape(1, 28, 28)
+                                   for x in data]).astype('float32')
+                y_data = np.array(
+                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
+
+                img = fluid.dygraph.to_variable(x_data)
+                label = fluid.dygraph.to_variable(y_data)
+
+                out = lenet(img)
+                loss = fluid.layers.cross_entropy(out, label)
+                avg_loss = fluid.layers.mean(loss)
+                avg_loss.backward()
+                adam.minimize(avg_loss)
+                lenet.clear_gradients()
+                dynamic_loss_rec.append(avg_loss.numpy()[0])
+                if batch_id % 100 == 0:
+                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
+
+            lenet.eval()
+            op_object_list = (Conv2D, ReLU, ReLU6, LeakyReLU, Sigmoid, Pool2D,
+                              BatchNorm)
+
+        path = "./dynamic_outscale_infer_model/lenet"
+        save_dir = "./dynamic_outscale_infer_model"
+
+        imperative_out_scale.save_quantized_model(
+            layer=lenet,
+            path=path,
+            input_spec=[
+                paddle.static.InputSpec(
+                    shape=[None, 1, 28, 28], dtype='float32')
+            ])
+
+        _logger.info(
+            "--------------------------static graph qat--------------------------"
+        )
+        static_loss_rec = []
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+        else:
+            place = core.CPUPlace()
+        exe = fluid.Executor(place)
+
+        main = fluid.Program()
+        infer = fluid.Program()
+        startup = fluid.Program()
+        static_img, static_label, static_loss = _build_static_lenet(
+            main, startup, False, seed)
+        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
+                                                      seed)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, startup):
+                opt = AdamOptimizer(learning_rate=lr)
+                opt.minimize(static_loss)
+
+        scope = core.Scope()
+        with fluid.scope_guard(scope):
+            exe.run(startup)
+        for param in main.all_parameters():
+            param_tensor = scope.var(param.name).get_tensor()
+            param_tensor.set(param_init_map[param.name], place)
+        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
+        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
+        transform_pass = OutScaleForTrainingPass(scope=scope, place=place)
+        transform_pass.apply(main_graph)
+        build_strategy = fluid.BuildStrategy()
+        build_strategy.fuse_all_reduce_ops = False
+        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
+            loss_name=static_loss.name, build_strategy=build_strategy)
+
+        feeder = fluid.DataFeeder(
+            feed_list=[static_img, static_label], place=place)
+        with fluid.scope_guard(scope):
+            for batch_id, data in enumerate(reader()):
+                loss_v, = exe.run(binary,
+                                  feed=feeder.feed(data),
+                                  fetch_list=[static_loss])
+                static_loss_rec.append(loss_v[0])
+                if batch_id % 100 == 0:
+                    _logger.info('{}: {}'.format('loss', loss_v))
+        scale_inference_pass = OutScaleForInferencePass(scope=scope)
+        scale_inference_pass.apply(infer_graph)
+
+        out_scale_op_list = [
+            "batch_norm", "conv2d", "leaky_relu", "pool2d", "relu6", "relu",
+            "sigmoid", "tanh", "relu6", "softmax", "conv2d_transpose",
+            "elementwise_add"
+        ]
+        op_nodes = infer_graph.all_op_nodes()
+        for op_node in op_nodes:
+            if op_node.name() in out_scale_op_list:
+                static_out_scale_list.append(op_node.op().attr("out_threshold"))
+
+        save_program = infer_graph.to_program()
+        with fluid.scope_guard(scope):
+            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
+                                          [infer_pre], exe, save_program)
+        rtol = 1e-05
+        atol = 1e-08
+        for i, (loss_d,
+                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
+            diff = np.abs(loss_d - loss_s)
+            if diff > (atol + rtol * np.abs(loss_s)):
+                _logger.info(
+                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
+                    format(diff, i, loss_d, loss_s))
+                break
+
+        self.assertTrue(
+            np.allclose(
+                np.array(dynamic_loss_rec),
+                np.array(static_loss_rec),
+                rtol=rtol,
+                atol=atol,
+                equal_nan=True),
+            msg='Failed to do the imperative qat.')
+        # load dynamic model
+        [inference_program, feed_target_names, fetch_targets] = (
+            fluid.io.load_inference_model(
+                dirname=save_dir,
+                executor=exe,
+                model_filename="lenet" + INFER_MODEL_SUFFIX,
+                params_filename="lenet" + INFER_PARAMS_SUFFIX))
+
+        global_block = inference_program.global_block()
+        for op in global_block.ops:
+            if op.has_attr('out_threshold'):
+                dynamic_out_scale_list.append(op.attr('out_threshold'))
+
+        check_list = [
+            False for item in dynamic_out_scale_list
+            if item not in static_out_scale_list
+        ]
+        self.assertTrue(len(check_list) == 0)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
new file mode 100644
index 00000000000..c947eeb31fc
--- /dev/null
+++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
@@ -0,0 +1,83 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import paddle
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import core
+from paddle.fluid.contrib.slim.quantization.imperative import quant_nn
+
+paddle.enable_static()
+
+
+def init_data(batch_size=32, img_shape=[784], label_range=9):
+    np.random.seed(5)
+    assert isinstance(img_shape, list)
+    input_shape = [batch_size] + img_shape
+    img = np.random.random(size=input_shape).astype(np.float32)
+    label = np.array(
+        [np.random.randint(0, label_range) for _ in range(batch_size)]).reshape(
+            (-1, 1)).astype("int64")
+    return img, label
+
+
+class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
+    def check_backward(self, use_cuda):
+        main_program = fluid.Program()
+        startup_program = fluid.Program()
+        with fluid.program_guard(main_program, startup_program):
+            image = fluid.layers.data(
+                name='image', shape=[784], dtype='float32')
+            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            fc_tmp = fluid.layers.fc(image, size=10, act='softmax')
+            out_scale = quant_nn.MovingAverageAbsMaxScale(
+                name=fc_tmp.name, dtype=fc_tmp.dtype)
+            fc_tmp_1 = out_scale(fc_tmp)
+            cross_entropy = fluid.layers.softmax_with_cross_entropy(fc_tmp,
+                                                                    label)
+            loss = fluid.layers.reduce_mean(cross_entropy)
+            sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+            sgd.minimize(loss)
+
+        moving_average_abs_max_scale_ops = [
+            op for op in main_program.blocks[0].ops
+            if op.type == u'moving_average_abs_max_scale'
+        ]
+        assert len(
+            moving_average_abs_max_scale_ops
+        ) == 1, "The number of moving_average_abs_max_scale_ops should be 1."
+
+        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        exe.run(startup_program)
+
+        binary = fluid.compiler.CompiledProgram(
+            main_program).with_data_parallel(loss_name=loss.name)
+
+        img, label = init_data()
+        feed_dict = {"image": img, "label": label}
+        res = exe.run(binary, feed_dict)
+
+    def test_fw_bw(self):
+        if core.is_compiled_with_cuda():
+            self.check_backward(use_cuda=True)
+        self.check_backward(use_cuda=False)
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab