[quant] Add quant wrap for functional api and refine the qat (#33162)

* Add wrap for functional api * Refine the wraped api * Add unit test for quant functional layers * Update all unit tests for dygraph qat

[quant] Add quant wrap for functional api and refine the qat (#33162)
* Add wrap for functional api * Refine the wraped api * Add unit test for quant functional layers * Update all unit tests for dygraph qat
ddc95a01 · cc · GitHub · 92081e1d · ddc95a01 · ddc95a01
15 changed file
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -251,24 +251,25 @@ class ImperativeQuantizeInputs(object):
        super(ImperativeQuantizeInputs, self).__init__()

        self._quantizable_layer_type = tuple(
-            utils.quant_input_layers_map[layer]
-            if layer in utils.quant_input_layers_map else layer
+            utils.layer_name_map[layer]
+            if layer in utils.layer_name_map else layer
            for layer in quantizable_layer_type)
        for layer in self._quantizable_layer_type:
-            assert not isinstance(layer, str), \
+            assert not isinstance(layer, str) \
+                and layer in utils.fake_quant_input_layers, \
                "%s is unspported to be quantized." % layer

        quantize_type = {
            'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max'
        }
-        assert weight_quantize_type in quantize_type, \
+        assert weight_quantize_type != 'moving_average_abs_max' \
+            and weight_quantize_type in quantize_type, \
            "Unsupported weight_quantize_type: %s. It can only " \
-            "be abs_max or moving_average_abs_max or " \
-            "channel_wise_abs_max." % weight_quantize_type
-        assert activation_quantize_type != 'channel_wise_abs_max' \
-            and activation_quantize_type in quantize_type, \
+            "be abs_max or channel_wise_abs_max." % weight_quantize_type
+        # TODO (jc): activation_quantize_type supports range_abs_max
+        assert activation_quantize_type == 'moving_average_abs_max', \
            "Unsupported activation_quantize_type: %s. It can " \
-            "only be abs_max or moving_average_abs_max now." \
+            "only be moving_average_abs_max now." \
            % activation_quantize_type

        bits_check = lambda bits: isinstance(bits, int) \
@@ -305,30 +306,22 @@ class ImperativeQuantizeInputs(object):
        assert isinstance(model, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

-        for name, layer in model.named_sublayers():
-            if not isinstance(layer, self._quantizable_layer_type) \
-                or (hasattr(layer, "skip_quant") \
-                    and layer.skip_quant == True):
+        for name, cur_layer in model.named_sublayers():
+            if not isinstance(cur_layer, self._quantizable_layer_type) \
+                or (hasattr(cur_layer, "skip_quant") \
+                    and cur_layer.skip_quant == True):
                continue

-            # TODO(jc): optimize this module
-            last_idx = 0
-            idx = 0
-            obj = model
-            while idx < len(name):
-                if (name[idx] == '.'):
-                    if hasattr(obj, name[last_idx:idx]):
-                        obj = getattr(obj, name[last_idx:idx])
-                        last_idx = idx + 1
-                idx += 1
-            target = name[last_idx:idx]
-
-            quant_layer = self._get_input_quantized_layer(layer)
-            setattr(obj, target, quant_layer)
+            parent_layer, sub_name = \
+                utils.find_parent_layer_and_sub_name(model, name)
+
+            cur_quant_layer = self._get_input_quantized_layer(cur_layer)
+            setattr(parent_layer, sub_name, cur_quant_layer)

    def _get_input_quantized_layer(self, layer):
        quant_layer_name = None
-        for key, value in utils.quant_input_layers_map.items():
+
+        for key, value in utils.layer_name_map.items():
            if isinstance(layer, value):
                quant_layer_name = 'Quantized' + key
                break
@@ -336,10 +329,6 @@ class ImperativeQuantizeInputs(object):
            "The layer %s is unsupported to be quantized." \
            % layer.full_name()

-        layer_with_weight = ['QuantizedConv2D', 'QuantizedLinear']
-        if quant_layer_name not in layer_with_weight:
-            quant_layer_name = 'QuantizedNoweightLayer'
-
        return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs)


@@ -374,25 +363,21 @@ class ImperativeQuantizeOutputs(object):
        assert isinstance(model, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

-        for name, layer in model.named_sublayers():
-            if not self._is_target_layer(layer):
+        for cur_name, cur_layer in model.named_sublayers():
+            if not self._is_target_layer(cur_layer):
                continue

-            # TODO(jc): optimize this module
-            last_idx = 0
-            idx = 0
-            obj = model
-            while idx < len(name):
-                if (name[idx] == '.'):
-                    if hasattr(obj, name[last_idx:idx]):
-                        obj = getattr(obj, name[last_idx:idx])
-                        last_idx = idx + 1
-                idx += 1
-            target = name[last_idx:idx]
-
-            quant_layer = quant_nn.__dict__["QuantizedOutputLayer"](
-                layer, self._moving_rate)
-            setattr(obj, target, quant_layer)
+            parent_layer, sub_name = \
+                utils.find_parent_layer_and_sub_name(model, cur_name)
+
+            if isinstance(cur_layer, tuple(utils.fake_quant_output_layers)):
+                cur_quant_layer = quant_nn.FakeQuantMAOutputScaleLayer(
+                    cur_layer, self._moving_rate)
+            else:
+                cur_quant_layer = quant_nn.MAOutputScaleLayer(cur_layer,
+                                                              self._moving_rate)
+
+            setattr(parent_layer, sub_name, cur_quant_layer)

    def save_quantized_model(self, layer, path, input_spec=None, **config):
        """
@@ -468,9 +453,18 @@ class ImperativeQuantizeOutputs(object):
        """
        Whether the layer needs to calculate output scales.
        """
-        return isinstance(layer, utils.quant_output_layers) \
-            or ('quantized' in layer.full_name() and \
-                'quantized_noweight' not in layer.full_name())
+        flag = False
+        if isinstance(layer, dygraph.Layer):
+            # exclude fake_quant ops in quant_nn file
+            if utils.is_leaf_layer(layer) and \
+                not isinstance(layer, tuple(utils.fake_quant_leaf_layers)):
+                flag = True
+            # consider QuantizedConv2D and QuantizedLinear ops
+            if isinstance(layer, tuple(utils.fake_quant_wrap_layers)):
+                flag = True
+        if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
+            flag = True
+        return flag

    def _save_output_scale(self, program, scope):
        """
@@ -514,4 +508,4 @@ class ImperativeQuantizeOutputs(object):
        previous_ops = [utils.find_previous_op(block, arg_name) \
            for arg_name in in_op.input_arg_names]
        return any(op is not None and op.type not in \
-            utils.fake_quantize_dequantize_types for op in previous_ops)
+            utils.fake_quantize_dequantize_op_types for op in previous_ops)
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -22,17 +22,28 @@ from paddle.fluid.framework import in_dygraph_mode
 from paddle.fluid.initializer import Constant
 from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle.nn import functional as F
+import logging
+from paddle.fluid.log_helper import get_logger

 __all__ = [
-    'FakeQuantMovingAverage', 'FakeQuantAbsMax',
-    'FakeChannelWiseQuantDequantAbsMax', 'QuantizedConv2D', 'QuantizedLinear',
-    'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale'
+    'FakeQuantMovingAverageAbsMax',
+    'FakeQuantAbsMax',
+    'FakeQuantChannelWiseAbsMax',
+    'QuantizedConv2D',
+    'QuantizedLinear',
+    'QuantizedNoweightLayer',
+    'MovingAverageAbsMaxScale',
+    'MAOutputScaleLayer',
+    'FakeQuantMAOutputScaleLayer',
 ]

+_logger = get_logger(
+    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')

-class FakeQuantMovingAverage(layers.Layer):
+
+class FakeQuantMovingAverageAbsMax(layers.Layer):
    r"""
-    FakeQuantMovingAverage layer does the moving_average_abs_max quant and then dequant.
+    FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
    Its computational formula is described as below:

    :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
@@ -45,7 +56,7 @@ class FakeQuantMovingAverage(layers.Layer):
                 moving_rate=0.9,
                 quant_bits=8,
                 dtype='float32'):
-        super(FakeQuantMovingAverage, self).__init__()
+        super(FakeQuantMovingAverageAbsMax, self).__init__()
        self._moving_rate = moving_rate
        self._quant_bits = quant_bits

@@ -98,7 +109,7 @@ class FakeQuantMovingAverage(layers.Layer):
            return out

        check_variable_and_dtype(input, 'input', ['float32'],
-                                 "FakeQuantMovingAverage")
+                                 "FakeQuantMovingAverageAbsMax")
        attrs = {
            'moving_rate': self._moving_rate,
            'bit_length': self._quant_bits,
@@ -210,7 +221,7 @@ class FakeQuantAbsMax(layers.Layer):
        return quant_out


-class FakeChannelWiseQuantDequantAbsMax(layers.Layer):
+class FakeQuantChannelWiseAbsMax(layers.Layer):
    def __init__(self,
                 name=None,
                 channel_num=None,
@@ -219,7 +230,7 @@ class FakeChannelWiseQuantDequantAbsMax(layers.Layer):
                 dtype='float32',
                 quant_on_weight=False):
        assert quant_on_weight == True, "Channel_wise only can be used on weight quantization."
-        super(FakeChannelWiseQuantDequantAbsMax, self).__init__()
+        super(FakeQuantChannelWiseAbsMax, self).__init__()
        self._quant_bits = quant_bits
        self._quant_axis = quant_axis
        self._dtype = dtype
@@ -265,7 +276,7 @@ class FakeChannelWiseQuantDequantAbsMax(layers.Layer):
            return out

        check_variable_and_dtype(input, 'input', ['float32'],
-                                 "FakeChannelWiseQuantDequantAbsMax")
+                                 "FakeQuantChannelWiseAbsMax")
        attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis}
        inputs = {"X": [input]}
        quant_out = self._helper.create_variable(
@@ -313,8 +324,8 @@ def _get_fake_quant_type(quant_type, **kwargs):
            "when you use channel_wise_abs_max strategy.")
    fake_quant_map = {
        'abs_max': FakeQuantAbsMax,
-        'moving_average_abs_max': FakeQuantMovingAverage,
-        'channel_wise_abs_max': FakeChannelWiseQuantDequantAbsMax
+        'moving_average_abs_max': FakeQuantMovingAverageAbsMax,
+        'channel_wise_abs_max': FakeQuantChannelWiseAbsMax
    }

    return fake_quant_map[quant_type](**call_args)
@@ -498,12 +509,7 @@ class QuantizedNoweightLayer(layers.Layer):
            quant_on_weight=False)

    def forward(self, input):
-        quant_input = self._fake_quant_input(input)
-        # TODO (jc): support ops that have several inputs
-        if isinstance(input, list):
-            assert len(input) == 1, \
-                "The QuantizedNoweightLayer should only have one input."
-        return self._layer.forward(quant_input)
+        return self._layer.forward(self._fake_quant_input(input))


 class MovingAverageAbsMaxScale(layers.Layer):
@@ -590,19 +596,56 @@ class MovingAverageAbsMaxScale(layers.Layer):
        return quant_out


-class QuantizedOutputLayer(layers.Layer):
-    def __init__(self, layer=None, moving_rate=0.9, dtype='float32'):
+class MAOutputScaleLayer(layers.Layer):
+    """
+    Calculate the scale (moving average abs max) for the output of the input layer.
+    Add MovingAverageMaxScale layer to the behind of the input layer.
+    """
+
+    def __init__(self, layer=None, moving_rate=0.9, name=None, dtype='float32'):
        r"""
-        Add MovingAverageMaxScale layer to the behind of the input layer.
+        Construct
        """
-        super(QuantizedOutputLayer, self).__init__()
+        super(MAOutputScaleLayer, self).__init__()
        self._layer = layer
-        self._moving_average_abs_max_scale = \
-            MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype)
+        if name is None:
+            name = layer.full_name()
+        self._ma_output_scale = \
+            MovingAverageAbsMaxScale(name, moving_rate, dtype)
+
+    def forward(self, *inputs, **kwargs):
+        out = self._layer(*inputs, **kwargs)
+        # TODO (jc): support the ops of several outputs
+        if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
+            return out
+        else:
+            return self._ma_output_scale(out)

-    def forward(self, input):
-        if isinstance(input, list):
-            assert len(input) == 1, \
-                "The QuantizedOutputLayer should only have one input."
-        out = self._layer(input)
-        return self._moving_average_abs_max_scale(out)
+
+class FakeQuantMAOutputScaleLayer(layers.Layer):
+    def __init__(self,
+                 layer,
+                 weight_bits=8,
+                 activation_bits=8,
+                 moving_rate=0.9,
+                 name=None,
+                 *args,
+                 **kwargs):
+
+        super(FakeQuantMAOutputScaleLayer, self).__init__()
+        self._layer = layer
+        self._fake_quant_output = _get_fake_quant_type(
+            'moving_average_abs_max',
+            name=layer.full_name() if name is None else name,
+            moving_rate=moving_rate,
+            quant_bits=activation_bits,
+            dtype=self._dtype,
+            quant_on_weight=False)
+
+    def forward(self, *inputs, **kwargs):
+        out = self._layer(*inputs, **kwargs)
+        # TODO (jc): support the ops of several outputs
+        if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
+            return out
+        else:
+            return self._fake_quant_output(out)
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -13,9 +13,11 @@
 # limitations under the License.

 import paddle
+from paddle.fluid import dygraph
 import numpy as np
+from . import quant_nn

-quant_input_layers_map = {
+layer_name_map = {
    'Conv2D': paddle.nn.Conv2D,
    'Linear': paddle.nn.Linear,
    'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D,
@@ -37,30 +39,38 @@ quant_input_layers_map = {
    'LayerNorm': paddle.nn.LayerNorm,
 }

-fake_quantize_dequantize_types = [
-    "fake_quantize_dequantize_abs_max",
-    "fake_channel_wise_quantize_dequantize_abs_max",
-    "fake_quantize_dequantize_moving_average_abs_max"
+# Apply fake quant for the inputs of these layers
+# TODO (jc): support paddle.nn.Conv2DTranspose
+fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear]
+
+# Apply fake quant for the output of these layers
+# TODO(jc): fix the problem of adding duplicate fake_quant ops
+# paddle.nn.AdaptiveAvgPool2D, paddle.nn.AvgPool2D, paddle.nn.ReLU,paddle.nn.LeakyReLU
+fake_quant_output_layers = [
+    paddle.nn.quant.add, paddle.nn.quant.subtract, paddle.nn.quant.multiply,
+    paddle.nn.quant.divide
+]
+
+fake_quant_leaf_layers = [
+    quant_nn.FakeQuantAbsMax,
+    quant_nn.FakeQuantChannelWiseAbsMax,
+    quant_nn.FakeQuantMovingAverageAbsMax,
+    quant_nn.MovingAverageAbsMaxScale,
 ]

-quant_output_layers = (
-    paddle.nn.Conv2D, paddle.nn.Conv2DTranspose, paddle.nn.Linear,
-    paddle.nn.AdaptiveAvgPool2D, paddle.nn.AdaptiveMaxPool2D,
-    paddle.nn.AvgPool2D, paddle.nn.MaxPool2D, paddle.nn.BatchNorm,
-    paddle.nn.BatchNorm2D, paddle.nn.LayerNorm, paddle.nn.SyncBatchNorm,
-    paddle.nn.ELU, paddle.nn.GELU, paddle.nn.Hardshrink, paddle.nn.Hardsigmoid,
-    paddle.nn.Hardswish, paddle.nn.Hardtanh, paddle.nn.LeakyReLU,
-    paddle.nn.LogSigmoid, paddle.nn.LogSoftmax, paddle.nn.Maxout,
-    paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6, paddle.nn.SELU,
-    paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Softplus,
-    paddle.nn.Softshrink, paddle.nn.Softsign, paddle.nn.Swish, paddle.nn.Tanh,
-    paddle.nn.Tanhshrink, paddle.nn.ThresholdedReLU, paddle.nn.Upsample)
+fake_quant_wrap_layers = [quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear]

 weight_op_types = [
    "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose",
    "depthwise_conv2d_transpose"
 ]

+fake_quantize_dequantize_op_types = [
+    "fake_quantize_dequantize_abs_max",
+    "fake_channel_wise_quantize_dequantize_abs_max",
+    "fake_quantize_dequantize_moving_average_abs_max"
+]
+

 def load_variable_data(scope, var_name):
    '''
@@ -90,3 +100,36 @@ def find_next_ops(block, var_name):
        if var_name in op.input_arg_names:
            res_ops.append(op)
    return res_ops
+
+
+def find_parent_layer_and_sub_name(model, name):
+    """
+    Given the model and the name of a layer, find the parent layer and
+    the sub_name of the layer.
+    For example, if name is 'block_1/convbn_1/conv_1', the parent layer is
+    'block_1/convbn_1' and the sub_name is `conv_1`.
+    """
+    assert isinstance(model, dygraph.Layer), \
+            "The model must be the instance of paddle.nn.Layer."
+    assert len(name) > 0, "The input (name) should not be empty."
+
+    last_idx = 0
+    idx = 0
+    parent_layer = model
+    while idx < len(name):
+        if name[idx] == '.':
+            sub_name = name[last_idx:idx]
+            if hasattr(parent_layer, sub_name):
+                parent_layer = getattr(parent_layer, sub_name)
+                last_idx = idx + 1
+        idx += 1
+    sub_name = name[last_idx:idx]
+    return parent_layer, sub_name
+
+
+def is_leaf_layer(layer):
+    """
+    Whether the layer is leaf layer.
+    """
+    return isinstance(layer, dygraph.Layer) \
+        and len(layer.sublayers()) == 0
--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -270,12 +270,6 @@ list(REMOVE_ITEM TEST_OPS
 #TODO(wanghaoshuang): Fix this unitest failed on GCC8.
 LIST(REMOVE_ITEM TEST_OPS test_auto_pruning)
 LIST(REMOVE_ITEM TEST_OPS test_filter_pruning)
-
-# only tests on singal GPU environment
-LIST(REMOVE_ITEM TEST_OPS test_imperative_qat_addquantdequant)
-
-py_test_modules(test_imperative_qat_addquantdequant MODULES test_imperative_qat_addquantdequant ENVS
-	CUDA_VISIBLE_DEVICES=0)
 	
 # fix
 if(WIN32)
@@ -313,7 +307,6 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120)
 set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120)
-set_tests_properties(test_imperative_qat_addquantdequant PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120)
 if(LINUX AND WITH_MKLDNN)
    set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120)

--- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
+++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
+#   copyright (c) 2021 paddlepaddle authors. all rights reserved.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+import numpy as np
+import logging
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import core
+from paddle.fluid.dygraph.container import Sequential
+from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
+from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
+
+from paddle.fluid.log_helper import get_logger
+
+_logger = get_logger(
+    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
+
+
+def fix_model_dict(model):
+    fixed_state = {}
+    for name, param in model.named_parameters():
+        p_shape = param.numpy().shape
+        p_value = param.numpy()
+        if name.endswith("bias"):
+            value = np.zeros_like(p_value).astype('float32')
+        else:
+            value = np.random.normal(
+                loc=0.0, scale=0.01,
+                size=np.product(p_shape)).reshape(p_shape).astype('float32')
+        fixed_state[name] = value
+    model.set_dict(fixed_state)
+    return model
+
+
+def train_lenet(lenet, reader, optimizer):
+    loss_list = []
+    lenet.train()
+
+    for batch_id, data in enumerate(reader()):
+        x_data = np.array([x[0].reshape(1, 28, 28)
+                           for x in data]).astype('float32')
+        y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
+
+        img = paddle.to_tensor(x_data)
+        label = paddle.to_tensor(y_data)
+
+        out = lenet(img)
+        loss = fluid.layers.cross_entropy(out, label)
+        avg_loss = fluid.layers.mean(loss)
+        avg_loss.backward()
+
+        optimizer.minimize(avg_loss)
+        lenet.clear_gradients()
+
+        if batch_id % 100 == 0:
+            loss_list.append(avg_loss.numpy()[0])
+            _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
+
+    return loss_list
+
+
+class ImperativeLenet(fluid.dygraph.Layer):
+    def __init__(self, num_classes=10):
+        super(ImperativeLenet, self).__init__()
+        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        self.features = Sequential(
+            Conv2D(
+                in_channels=1,
+                out_channels=6,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=conv2d_w1_attr,
+                bias_attr=False),
+            BatchNorm2D(6),
+            ReLU(),
+            MaxPool2D(
+                kernel_size=2, stride=2),
+            Conv2D(
+                in_channels=6,
+                out_channels=16,
+                kernel_size=5,
+                stride=1,
+                padding=0,
+                weight_attr=conv2d_w2_attr,
+                bias_attr=conv2d_b2_attr),
+            BatchNorm2D(16),
+            PReLU(),
+            MaxPool2D(
+                kernel_size=2, stride=2))
+
+        self.fc = Sequential(
+            Linear(
+                in_features=400,
+                out_features=120,
+                weight_attr=fc_w1_attr,
+                bias_attr=fc_b1_attr),
+            LeakyReLU(),
+            Linear(
+                in_features=120,
+                out_features=84,
+                weight_attr=fc_w2_attr,
+                bias_attr=fc_b2_attr),
+            Sigmoid(),
+            Linear(
+                in_features=84,
+                out_features=num_classes,
+                weight_attr=fc_w3_attr,
+                bias_attr=fc_b3_attr),
+            Softmax())
+        self.add = paddle.nn.quant.add()
+
+    def forward(self, inputs):
+        x = self.features(inputs)
+
+        x = fluid.layers.flatten(x, 1)
+        x = self.add(x, paddle.to_tensor(0.0))  # For CI
+        x = self.fc(x)
+        return x
+
+
+class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
+    def __init__(self, num_classes=10):
+        super(ImperativeLenetWithSkipQuant, self).__init__()
+
+        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
+        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        self.conv2d_0 = Conv2D(
+            in_channels=1,
+            out_channels=6,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            weight_attr=conv2d_w1_attr,
+            bias_attr=conv2d_b1_attr)
+        self.conv2d_0.skip_quant = True
+
+        self.batch_norm_0 = BatchNorm2D(6)
+        self.relu_0 = ReLU()
+        self.pool2d_0 = MaxPool2D(kernel_size=2, stride=2)
+        self.conv2d_1 = Conv2D(
+            in_channels=6,
+            out_channels=16,
+            kernel_size=5,
+            stride=1,
+            padding=0,
+            weight_attr=conv2d_w2_attr,
+            bias_attr=conv2d_b2_attr)
+        self.conv2d_1.skip_quant = False
+
+        self.batch_norm_1 = BatchNorm2D(16)
+        self.relu6_0 = ReLU6()
+        self.pool2d_1 = MaxPool2D(kernel_size=2, stride=2)
+        self.linear_0 = Linear(
+            in_features=400,
+            out_features=120,
+            weight_attr=fc_w1_attr,
+            bias_attr=fc_b1_attr)
+        self.linear_0.skip_quant = True
+
+        self.leaky_relu_0 = LeakyReLU()
+        self.linear_1 = Linear(
+            in_features=120,
+            out_features=84,
+            weight_attr=fc_w2_attr,
+            bias_attr=fc_b2_attr)
+        self.linear_1.skip_quant = False
+
+        self.sigmoid_0 = Sigmoid()
+        self.linear_2 = Linear(
+            in_features=84,
+            out_features=num_classes,
+            weight_attr=fc_w3_attr,
+            bias_attr=fc_b3_attr)
+        self.linear_2.skip_quant = False
+        self.softmax_0 = Softmax()
+
+    def forward(self, inputs):
+        x = self.conv2d_0(inputs)
+        x = self.batch_norm_0(x)
+        x = self.relu_0(x)
+        x = self.pool2d_0(x)
+        x = self.conv2d_1(x)
+        x = self.batch_norm_1(x)
+        x = self.relu6_0(x)
+        x = self.pool2d_1(x)
+
+        x = fluid.layers.flatten(x, 1)
+
+        x = self.linear_0(x)
+        x = self.leaky_relu_0(x)
+        x = self.linear_1(x)
+        x = self.sigmoid_0(x)
+        x = self.linear_2(x)
+        x = self.softmax_0(x)
+
+        return x
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
@@ -28,7 +28,6 @@ from paddle.fluid import core
 from paddle.fluid.optimizer import AdamOptimizer
 from paddle.fluid.framework import IrGraph
 from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass, QuantizationTransformPass
 from paddle.fluid.dygraph.container import Sequential
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, PReLU
@@ -36,6 +35,8 @@ from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
 from paddle.fluid.log_helper import get_logger
 from paddle.fluid.dygraph import nn

+from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenet
+
 paddle.enable_static()

 os.environ["CPU_NUM"] = "1"
@@ -54,59 +55,6 @@ def get_vaild_warning_num(warning, w):
    return num


-def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
-    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-    conv1 = fluid.layers.conv2d(
-        data,
-        num_filters=6,
-        filter_size=3,
-        stride=1,
-        padding=1,
-        param_attr=conv2d_w1_attr,
-        bias_attr=False)
-    batch_norm1 = layers.batch_norm(conv1)
-    relu1 = layers.relu(batch_norm1)
-    pool1 = fluid.layers.pool2d(
-        relu1, pool_size=2, pool_type='max', pool_stride=2)
-    conv2 = fluid.layers.conv2d(
-        pool1,
-        num_filters=16,
-        filter_size=5,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w2_attr,
-        bias_attr=conv2d_b2_attr)
-    batch_norm2 = layers.batch_norm(conv2)
-    prelu1 = layers.prelu(batch_norm2, mode='all')
-    pool2 = fluid.layers.pool2d(
-        prelu1, pool_size=2, pool_type='max', pool_stride=2)
-
-    fc1 = fluid.layers.fc(input=pool2,
-                          size=120,
-                          param_attr=fc_w1_attr,
-                          bias_attr=fc_b1_attr)
-    leaky_relu1 = layers.leaky_relu(fc1, alpha=0.01)
-    fc2 = fluid.layers.fc(input=leaky_relu1,
-                          size=84,
-                          param_attr=fc_w2_attr,
-                          bias_attr=fc_b2_attr)
-    sigmoid1 = layers.sigmoid(fc2)
-    fc3 = fluid.layers.fc(input=sigmoid1,
-                          size=num_classes,
-                          param_attr=fc_w3_attr,
-                          bias_attr=fc_b3_attr)
-    softmax1 = layers.softmax(fc3, use_cudnn=True)
-    return softmax1
-
-
 class ImperativeLenet(fluid.dygraph.Layer):
    def __init__(self, num_classes=10):
        super(ImperativeLenet, self).__init__()
@@ -175,38 +123,11 @@ class ImperativeLenet(fluid.dygraph.Layer):

 class TestImperativeOutSclae(unittest.TestCase):
    def test_out_scale_acc(self):
-        def _build_static_lenet(main, startup, is_test=False, seed=1000):
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    main.random_seed = seed
-                    startup.random_seed = seed
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32')
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64')
-                    prediction = StaticLenet(img)
-                    if not is_test:
-                        loss = fluid.layers.cross_entropy(
-                            input=prediction, label=label)
-                        avg_loss = fluid.layers.mean(loss)
-                    else:
-                        avg_loss = prediction
-            return img, label, avg_loss
-
-        reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
-        weight_quantize_type = 'abs_max'
-        activation_quantize_type = 'moving_average_abs_max'
-        param_init_map = {}
        seed = 1000
        lr = 0.001
-        dynamic_out_scale_list = []
-        static_out_scale_list = []

-        # imperative train
-        _logger.info(
-            "--------------------------dynamic graph qat--------------------------"
-        )
+        weight_quantize_type = 'abs_max'
+        activation_quantize_type = 'moving_average_abs_max'
        imperative_out_scale = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quantize_type)
@@ -215,207 +136,46 @@ class TestImperativeOutSclae(unittest.TestCase):
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed
+
            lenet = ImperativeLenet()
-            fixed_state = {}
-            for name, param in lenet.named_parameters():
-                p_shape = param.numpy().shape
-                p_value = param.numpy()
-                if name.endswith("bias"):
-                    value = np.zeros_like(p_value).astype('float32')
-                else:
-                    value = np.random.normal(
-                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
-                            p_shape).astype('float32')
-                fixed_state[name] = value
-                param_init_map[param.name] = value
-            lenet.set_dict(fixed_state)
+            lenet = fix_model_dict(lenet)
            imperative_out_scale.quantize(lenet)
+
+            reader = paddle.batch(
+                paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
            adam = AdamOptimizer(
                learning_rate=lr, parameter_list=lenet.parameters())
-            dynamic_loss_rec = []
-            lenet.train()
-            for batch_id, data in enumerate(reader()):
-                x_data = np.array([x[0].reshape(1, 28, 28)
-                                   for x in data]).astype('float32')
-                y_data = np.array(
-                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                img = fluid.dygraph.to_variable(x_data)
-                label = fluid.dygraph.to_variable(y_data)
-
-                out = lenet(img)
-                loss = fluid.layers.cross_entropy(out, label)
-                avg_loss = fluid.layers.mean(loss)
-                avg_loss.backward()
-                adam.minimize(avg_loss)
-                lenet.clear_gradients()
-                dynamic_loss_rec.append(avg_loss.numpy()[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
-
+            loss_list = train_lenet(lenet, reader, adam)
            lenet.eval()

        param_save_path = "test_save_quantized_model/lenet.pdparams"
        save_dict = lenet.state_dict()
        paddle.save(save_dict, param_save_path)

-        path = "./dynamic_outscale_infer_model/lenet"
-        dynamic_save_dir = "./dynamic_outscale_infer_model"
-
+        save_path = "./dynamic_outscale_infer_model/lenet"
        imperative_out_scale.save_quantized_model(
            layer=lenet,
-            path=path,
+            path=save_path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

-        _logger.info(
-            "--------------------------static graph qat--------------------------"
-        )
-        static_loss_rec = []
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        main = fluid.Program()
-        infer = fluid.Program()
-        startup = fluid.Program()
-        static_img, static_label, static_loss = _build_static_lenet(
-            main, startup, False, seed)
-        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
-                                                      seed)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                opt = AdamOptimizer(learning_rate=lr)
-                opt.minimize(static_loss)
-
-        scope = core.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup)
-        for param in main.all_parameters():
-            if "batch_norm" in param.name:
-                param_name = param.name.replace("norm", "norm2d")
-            elif 'prelu' in param.name:
-                param_name = param.name.replace("prelu", 'p_re_lu')
-            else:
-                param_name = param.name
-            param_tensor = scope.var(param.name).get_tensor()
-            param_tensor.set(param_init_map[param_name], place)
-        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
-        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=scope,
-            place=place,
-            activation_quantize_type=activation_quantize_type,
-            weight_quantize_type=weight_quantize_type,
-            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
-        transform_pass.apply(main_graph)
-        transform_pass.apply(infer_graph)
-        outscale_pass = OutScaleForTrainingPass(scope=scope, place=place)
-        outscale_pass.apply(main_graph)
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
-            loss_name=static_loss.name, build_strategy=build_strategy)
-
-        feeder = fluid.DataFeeder(
-            feed_list=[static_img, static_label], place=place)
-        with fluid.scope_guard(scope):
-            for batch_id, data in enumerate(reader()):
-                loss_v, = exe.run(binary,
-                                  feed=feeder.feed(data),
-                                  fetch_list=[static_loss])
-                static_loss_rec.append(loss_v[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', loss_v))
-        scale_inference_pass = OutScaleForInferencePass(scope=scope)
-        scale_inference_pass.apply(infer_graph)
-
-        save_program = infer_graph.to_program()
-        static_save_dir = "./static_outscale_infer_model"
-        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model(
-                dirname=static_save_dir,
-                feeded_var_names=[infer_img.name],
-                target_vars=[infer_pre],
-                executor=exe,
-                main_program=save_program,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX)
-
-        rtol = 1e-05
-        atol = 1e-08
-        for i, (loss_d,
-                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
-            diff = np.abs(loss_d - loss_s)
-            if diff > (atol + rtol * np.abs(loss_s)):
-                _logger.info(
-                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
-                    format(diff, i, loss_d, loss_s))
-                break
-        self.assertTrue(
-            np.allclose(
-                np.array(dynamic_loss_rec),
-                np.array(static_loss_rec),
-                rtol=rtol,
-                atol=atol,
-                equal_nan=True),
-            msg='Failed to do the imperative qat.')
-
-        # load dynamic model
-        [dynamic_inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=dynamic_save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-        # load static model
-        [static_inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=static_save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-
-        dynamic_ops = dynamic_inference_program.global_block().ops
-        static_ops = static_inference_program.global_block().ops
-
-        for op in dynamic_ops[:]:
-            if op.type == "flatten2" or 'fake' in op.type:
-                dynamic_ops.remove(op)
-
-        for op in static_ops[:]:
-            if 'fake' in op.type:
-                static_ops.remove(op)
-
-        op_count = 0
-        for i in range(len(dynamic_ops)):
-            if dynamic_ops[i].has_attr("out_threshold"):
-                op_count += 1
-                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
-                if dynamic_ops[i].attr("out_threshold") != static_ops[i].attr(
-                        "out_threshold"):
-                    _logger.info(dynamic_ops[i].attr("out_threshold"))
-                    _logger.info(static_ops[i].attr("out_threshold"))
-                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
-                                static_ops[i].attr("out_threshold"))
-
-        _logger.info("op_cout: {}".format(op_count))
-        self.assertTrue(op_count == 14)
+        for i in range(len(loss_list) - 1):
+            self.assertTrue(
+                loss_list[i] > loss_list[i + 1],
+                msg='Failed to do the imperative qat.')


 class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
    def test_save_quantized_model(self):
-        weight_quantize_type = 'abs_max'
-        activation_quantize_type = 'moving_average_abs_max'
+        lr = 0.001
+
        load_param_path = "test_save_quantized_model/lenet.pdparams"
-        path = "./dynamic_outscale_infer_model_from_checkpoint/lenet"
-        dynamic_model_save_dir = "./dynamic_outscale_infer_model_from_checkpoint"
-        static_model_save_dir = "./static_outscale_infer_model"
+        save_path = "./dynamic_outscale_infer_model_from_checkpoint/lenet"

+        weight_quantize_type = 'abs_max'
+        activation_quantize_type = 'moving_average_abs_max'
        imperative_out_scale = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quantize_type)
@@ -426,56 +186,25 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
            imperative_out_scale.quantize(lenet)
            lenet.set_dict(load_dict)

+            reader = paddle.batch(
+                paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
+            adam = AdamOptimizer(
+                learning_rate=lr, parameter_list=lenet.parameters())
+            loss_list = train_lenet(lenet, reader, adam)
+            lenet.eval()
+
        imperative_out_scale.save_quantized_model(
            layer=lenet,
-            path=path,
+            path=save_path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        # load dynamic model
-        [dynamic_inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=dynamic_model_save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-        # load static model
-        [static_inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=static_model_save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-
-        dynamic_ops = dynamic_inference_program.global_block().ops
-        static_ops = static_inference_program.global_block().ops
-
-        for op in dynamic_ops[:]:
-            if op.type == "flatten2" or 'fake' in op.type:
-                dynamic_ops.remove(op)
-
-        for op in static_ops[:]:
-            if 'fake' in op.type:
-                static_ops.remove(op)
-
-        op_count = 0
-        for i in range(len(dynamic_ops)):
-            if dynamic_ops[i].has_attr("out_threshold"):
-                op_count += 1
-                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
-                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
-                                static_ops[i].attr("out_threshold"))
-
-        _logger.info("op_cout: {}".format(op_count))
-        self.assertTrue(op_count == 14)
+        for i in range(len(loss_list) - 1):
+            self.assertTrue(
+                loss_list[i] > loss_list[i + 1],
+                msg='Failed to do the imperative qat.')


 if __name__ == '__main__':

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
@@ -21,20 +21,20 @@ import shutil
 import time
 import unittest
 import logging
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.framework import IrGraph
 from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.fluid.dygraph.container import Sequential
 from paddle.nn import Linear, Conv2D, Softmax
-from paddle.fluid.dygraph.nn import Pool2D
 from paddle.fluid.log_helper import get_logger
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.fluid.contrib.slim.quantization.imperative.quant_nn import QuantizedConv2D

+from imperative_test_utils import fix_model_dict, ImperativeLenet
+
 paddle.enable_static()

 os.environ["CPU_NUM"] = "1"
@@ -45,115 +45,6 @@ _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')


-def StaticLenet(data, num_classes=10):
-    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-    conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-    conv1 = fluid.layers.conv2d(
-        data,
-        num_filters=6,
-        filter_size=3,
-        stride=1,
-        padding=1,
-        param_attr=conv2d_w1_attr,
-        bias_attr=conv2d_b1_attr)
-    pool1 = fluid.layers.pool2d(
-        conv1, pool_size=2, pool_type='max', pool_stride=2)
-    conv2 = fluid.layers.conv2d(
-        pool1,
-        num_filters=16,
-        filter_size=5,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w2_attr,
-        bias_attr=conv2d_b2_attr)
-    pool2 = fluid.layers.pool2d(
-        conv2, pool_size=2, pool_type='max', pool_stride=2)
-
-    fc1 = fluid.layers.fc(input=pool2,
-                          size=120,
-                          param_attr=fc_w1_attr,
-                          bias_attr=fc_b1_attr)
-    fc2 = fluid.layers.fc(input=fc1,
-                          size=84,
-                          param_attr=fc_w2_attr,
-                          bias_attr=fc_b2_attr)
-    fc3 = fluid.layers.fc(input=fc2,
-                          size=num_classes,
-                          param_attr=fc_w3_attr,
-                          bias_attr=fc_b3_attr)
-    fc4 = fluid.layers.softmax(fc3, use_cudnn=True)
-
-    return fc4
-
-
-class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10):
-        super(ImperativeLenet, self).__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-        self.features = Sequential(
-            Conv2D(
-                in_channels=1,
-                out_channels=6,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=conv2d_w1_attr,
-                bias_attr=conv2d_b1_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2),
-            Conv2D(
-                in_channels=6,
-                out_channels=16,
-                kernel_size=5,
-                stride=1,
-                padding=0,
-                weight_attr=conv2d_w2_attr,
-                bias_attr=conv2d_b2_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2))
-
-        self.fc = Sequential(
-            Linear(
-                in_features=400,
-                out_features=120,
-                weight_attr=fc_w1_attr,
-                bias_attr=fc_b1_attr),
-            Linear(
-                in_features=120,
-                out_features=84,
-                weight_attr=fc_w2_attr,
-                bias_attr=fc_b2_attr),
-            Linear(
-                in_features=84,
-                out_features=num_classes,
-                weight_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr),
-            Softmax())
-
-    def forward(self, inputs):
-        x = self.features(inputs)
-        x = fluid.layers.flatten(x, 1)
-        x = self.fc(x)
-        return x
-
-
 class TestImperativeQat(unittest.TestCase):
    """
    QAT = quantization-aware training
@@ -164,19 +55,26 @@ class TestImperativeQat(unittest.TestCase):
        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
        cls.root_path = os.path.join(os.getcwd(), "imperative_qat_" + timestamp)
        cls.save_path = os.path.join(cls.root_path, "lenet")
-        cls.dynamic_root_path = os.path.join(os.getcwd(),
-                                             "dynamic_mnist_" + timestamp)
-        cls.dynamic_save_path = os.path.join(cls.dynamic_root_path, "model")

    @classmethod
    def tearDownClass(cls):
-        shutil.rmtree(cls.root_path)
-        shutil.rmtree(cls.dynamic_root_path)
+        try:
+            shutil.rmtree(cls.root_path)
+        except Exception as e:
+            print("Failed to delete {} due to {}".format(cls.root_path, str(e)))
+
+    def set_vars(self):
+        self.weight_quantize_type = None
+        self.activation_quantize_type = None
+        print('weight_quantize_type', self.weight_quantize_type)
+
+    def run_qat_save(self):
+        self.set_vars()

-    def test_qat_save(self):
        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type='abs_max',
-            activation_quantize_type='moving_average_abs_max')
+            weight_quantize_type=self.weight_quantize_type,
+            activation_quantize_type=self.activation_quantize_type)
+
        with fluid.dygraph.guard():
            # For CI coverage
            conv1 = Conv2D(
@@ -190,10 +88,17 @@ class TestImperativeQat(unittest.TestCase):
            data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
            quant_conv1(fluid.dygraph.to_variable(data))

+            seed = 1
+            np.random.seed(seed)
+            fluid.default_main_program().random_seed = seed
+            fluid.default_startup_program().random_seed = seed
+
            lenet = ImperativeLenet()
+            lenet = fix_model_dict(lenet)
            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=0.001, parameter_list=lenet.parameters())
+
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
            test_reader = paddle.batch(
@@ -226,6 +131,7 @@ class TestImperativeQat(unittest.TestCase):
                        break

                lenet.eval()
+                eval_acc_top1_list = []
                for batch_id, data in enumerate(test_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
@@ -242,14 +148,19 @@ class TestImperativeQat(unittest.TestCase):
                        input=out, label=label, k=5)

                    if batch_id % 100 == 0:
+                        eval_acc_top1_list.append(float(acc_top1.numpy()))
                        _logger.info(
                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
                            format(epoch, batch_id,
                                   acc_top1.numpy(), acc_top5.numpy()))

-            # save weights
-            model_dict = lenet.state_dict()
-            fluid.save_dygraph(model_dict, "save_temp")
+                # check eval acc
+                eval_acc_top1 = sum(eval_acc_top1_list) / len(
+                    eval_acc_top1_list)
+                print('eval_acc_top1', eval_acc_top1)
+                self.assertTrue(
+                    eval_acc_top1 > 0.9,
+                    msg="The test acc {%f} is less than 0.9." % eval_acc_top1)

            # test the correctness of `paddle.jit.save`
            data = next(test_reader())
@@ -260,13 +171,14 @@ class TestImperativeQat(unittest.TestCase):
            before_save = lenet(test_img)

        # save inference quantized model
-        paddle.jit.save(
+        imperative_qat.save_quantized_model(
            layer=lenet,
-            path=TestImperativeQat.save_path,
+            path=self.save_path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])
+        print('Quantized model saved in {%s}' % self.save_path)

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
@@ -275,183 +187,27 @@ class TestImperativeQat(unittest.TestCase):
        exe = fluid.Executor(place)
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(
-             dirname=TestImperativeQat.root_path,
+             dirname=self.root_path,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX)
        after_save, = exe.run(inference_program,
                              feed={feed_target_names[0]: test_data},
                              fetch_list=fetch_targets)
-
+        # check
        self.assertTrue(
            np.allclose(after_save, before_save.numpy()),
            msg='Failed to save the inference quantized model.')

-    def test_qat_acc(self):
-        def _build_static_lenet(main, startup, is_test=False, seed=1000):
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    main.random_seed = seed
-                    startup.random_seed = seed
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32')
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64')
-                    prediction = StaticLenet(img)
-                    if not is_test:
-                        loss = fluid.layers.cross_entropy(
-                            input=prediction, label=label)
-                        avg_loss = fluid.layers.mean(loss)
-                    else:
-                        avg_loss = prediction
-            return img, label, avg_loss
-
-        reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
-        weight_quantize_type = 'abs_max'
-        activation_quant_type = 'moving_average_abs_max'
-        param_init_map = {}
-        seed = 1000
-        lr = 0.01
-
-        # imperative train
-        _logger.info(
-            "--------------------------dynamic graph qat--------------------------"
-        )
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type=weight_quantize_type,
-            activation_quantize_type=activation_quant_type)

-        with fluid.dygraph.guard():
-            np.random.seed(seed)
-            fluid.default_main_program().random_seed = seed
-            fluid.default_startup_program().random_seed = seed
-            lenet = ImperativeLenet()
-            fixed_state = {}
-            for name, param in lenet.named_parameters():
-                p_shape = param.numpy().shape
-                p_value = param.numpy()
-                if name.endswith("bias"):
-                    value = np.zeros_like(p_value).astype('float32')
-                else:
-                    value = np.random.normal(
-                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
-                            p_shape).astype('float32')
-                fixed_state[name] = value
-                param_init_map[param.name] = value
-            lenet.set_dict(fixed_state)
+class TestImperativeQatAbsMax(TestImperativeQat):
+    def set_vars(self):
+        self.weight_quantize_type = 'abs_max'
+        self.activation_quantize_type = 'moving_average_abs_max'
+        print('weight_quantize_type', self.weight_quantize_type)

-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=lr, parameter_list=lenet.parameters())
-            dynamic_loss_rec = []
-            lenet.train()
-            for batch_id, data in enumerate(reader()):
-                x_data = np.array([x[0].reshape(1, 28, 28)
-                                   for x in data]).astype('float32')
-                y_data = np.array(
-                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                img = fluid.dygraph.to_variable(x_data)
-                label = fluid.dygraph.to_variable(y_data)
-
-                out = lenet(img)
-                loss = fluid.layers.cross_entropy(out, label)
-                avg_loss = fluid.layers.mean(loss)
-                avg_loss.backward()
-                adam.minimize(avg_loss)
-                lenet.clear_gradients()
-                dynamic_loss_rec.append(avg_loss.numpy()[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
-
-        paddle.jit.save(
-            layer=lenet,
-            path=TestImperativeQat.dynamic_save_path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        # static graph train
-        _logger.info(
-            "--------------------------static graph qat--------------------------"
-        )
-        static_loss_rec = []
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        main = fluid.Program()
-        infer = fluid.Program()
-        startup = fluid.Program()
-        static_img, static_label, static_loss = _build_static_lenet(
-            main, startup, False, seed)
-        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
-                                                      seed)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                opt = AdamOptimizer(learning_rate=lr)
-                opt.minimize(static_loss)
-
-        scope = core.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup)
-        for param in main.all_parameters():
-            param_tensor = scope.var(param.name).get_tensor()
-            param_tensor.set(param_init_map[param.name], place)
-
-        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
-        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=scope,
-            place=place,
-            activation_quantize_type=activation_quant_type,
-            weight_quantize_type=weight_quantize_type,
-            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
-        transform_pass.apply(main_graph)
-        transform_pass.apply(infer_graph)
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
-            loss_name=static_loss.name, build_strategy=build_strategy)
-
-        feeder = fluid.DataFeeder(
-            feed_list=[static_img, static_label], place=place)
-        with fluid.scope_guard(scope):
-            for batch_id, data in enumerate(reader()):
-                loss_v, = exe.run(binary,
-                                  feed=feeder.feed(data),
-                                  fetch_list=[static_loss])
-                static_loss_rec.append(loss_v[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', loss_v))
-
-        save_program = infer_graph.to_program()
-        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
-                                          [infer_pre], exe, save_program)
-        rtol = 1e-05
-        atol = 1e-08
-        for i, (loss_d,
-                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
-            diff = np.abs(loss_d - loss_s)
-            if diff > (atol + rtol * np.abs(loss_s)):
-                _logger.info(
-                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
-                    format(diff, i, loss_d, loss_s))
-                break
-
-        self.assertTrue(
-            np.allclose(
-                np.array(dynamic_loss_rec),
-                np.array(static_loss_rec),
-                rtol=rtol,
-                atol=atol,
-                equal_nan=True),
-            msg='Failed to do the imperative qat.')
+    def test_qat(self):
+        self.run_qat_save()


 if __name__ == '__main__':

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-from __future__ import print_function
-
-import os
-import numpy as np
-import random
-import shutil
-import time
-import unittest
-import logging
-import paddle
-import six
-import paddle.fluid as fluid
-from paddle.nn import functional
-from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
-from paddle.fluid.layers import nn
-from paddle.fluid import core
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware, QuantizationTransformPass, AddQuantDequantPass
-from paddle.fluid.dygraph.container import Sequential
-from paddle.fluid.dygraph.nn import Pool2D
-from paddle.nn.layer.activation import ReLU, LeakyReLU, ReLU6, Tanh, Swish
-from paddle.fluid.log_helper import get_logger
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-
-paddle.enable_static()
-
-os.environ["CPU_NUM"] = "1"
-if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-def StaticLenet(data, num_classes=10):
-    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-    conv2d_w3_attr = fluid.ParamAttr(name="conv2d_w_3")
-    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-    conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-    conv2d_b3_attr = fluid.ParamAttr(name="conv2d_b_3")
-    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-
-    conv1 = fluid.layers.conv2d(
-        data,
-        num_filters=6,
-        filter_size=3,
-        stride=1,
-        padding=1,
-        param_attr=conv2d_w1_attr,
-        bias_attr=conv2d_b1_attr)
-    conv1 = fluid.layers.leaky_relu(conv1, alpha=0.02)
-    pool1 = fluid.layers.pool2d(
-        conv1, pool_size=2, pool_type='max', pool_stride=2)
-    conv2 = fluid.layers.conv2d(
-        pool1,
-        num_filters=16,
-        filter_size=5,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w2_attr,
-        bias_attr=conv2d_b2_attr)
-    pool2 = fluid.layers.pool2d(
-        conv2, pool_size=2, pool_type='max', pool_stride=2)
-    pool2 = fluid.layers.relu(pool2)
-    pool2 = fluid.layers.swish(pool2)
-    conv3 = fluid.layers.conv2d(
-        pool2,
-        num_filters=16,
-        filter_size=1,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w3_attr,
-        bias_attr=conv2d_b3_attr)
-    conv3 = fluid.layers.relu6(conv3)
-    conv3 = paddle.tensor.math.tanh(conv3)
-    fc1 = fluid.layers.fc(input=conv3,
-                          size=120,
-                          param_attr=fc_w1_attr,
-                          bias_attr=fc_b1_attr)
-    fc2 = fluid.layers.fc(input=fc1,
-                          size=84,
-                          param_attr=fc_w2_attr,
-                          bias_attr=fc_b2_attr)
-    fc3 = fluid.layers.fc(input=fc2,
-                          size=num_classes,
-                          param_attr=fc_w3_attr,
-                          bias_attr=fc_b3_attr)
-    fc3 = fluid.layers.softmax(fc3, use_cudnn=True)
-
-    return fc3
-
-
-class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10):
-        super(ImperativeLenet, self).__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-        conv2d_w3_attr = fluid.ParamAttr(name="conv2d_w_3")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-        conv2d_b3_attr = fluid.ParamAttr(name="conv2d_b_3")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-        self.features = Sequential(
-            Conv2D(
-                in_channels=1,
-                out_channels=6,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=conv2d_w1_attr,
-                bias_attr=conv2d_b1_attr),
-            LeakyReLU(negative_slope=0.02),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2),
-            Conv2D(
-                in_channels=6,
-                out_channels=16,
-                kernel_size=5,
-                stride=1,
-                padding=0,
-                weight_attr=conv2d_w2_attr,
-                bias_attr=conv2d_b2_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2),
-            ReLU(),
-            Swish(),
-            Conv2D(
-                in_channels=16,
-                out_channels=16,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                weight_attr=conv2d_w3_attr,
-                bias_attr=conv2d_b3_attr),
-            ReLU6(),
-            Tanh())
-        self.fc = Sequential(
-            Linear(
-                in_features=400,
-                out_features=120,
-                weight_attr=fc_w1_attr,
-                bias_attr=fc_b1_attr),
-            Linear(
-                in_features=120,
-                out_features=84,
-                weight_attr=fc_w2_attr,
-                bias_attr=fc_b2_attr),
-            Linear(
-                in_features=84,
-                out_features=num_classes,
-                weight_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr),
-            Softmax())
-
-    def forward(self, inputs):
-        x = self.features(inputs)
-        x = fluid.layers.flatten(x, 1)
-        x = self.fc(x)
-        return x
-
-
-class TestImperativeAddQuantDequant(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
-        cls.root_path = os.path.join(os.getcwd(),
-                                     "imperative_qat_aqd_" + timestamp)
-        cls.save_path = os.path.join(cls.root_path, "lenet")
-        cls.dynamic_root_path = os.path.join(os.getcwd(),
-                                             "dynamic_mnist_aqd_" + timestamp)
-        cls.dynamic_save_path = os.path.join(cls.dynamic_root_path, "model")
-
-    @classmethod
-    def tearDownClass(cls):
-        shutil.rmtree(cls.root_path)
-        shutil.rmtree(cls.dynamic_root_path)
-
-    def test_qat_save(self):
-
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type='abs_max',
-            activation_quantize_type='moving_average_abs_max',
-            quantizable_layer_type=[
-                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
-                'Swish'
-            ])
-
-        with fluid.dygraph.guard():
-            lenet = ImperativeLenet()
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=0.001, parameter_list=lenet.parameters())
-            train_reader = paddle.batch(
-                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=32)
-
-            epoch_num = 1
-            for epoch in range(epoch_num):
-                lenet.train()
-                for batch_id, data in enumerate(train_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-                    out = lenet(img)
-                    acc = fluid.layers.accuracy(out, label)
-                    loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.mean(loss)
-                    avg_loss.backward()
-                    adam.minimize(avg_loss)
-                    lenet.clear_gradients()
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
-                            format(epoch, batch_id,
-                                   avg_loss.numpy(), acc.numpy()))
-                    if batch_id == 500:  # For shortening CI time
-                        break
-
-                lenet.eval()
-                for batch_id, data in enumerate(test_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-
-                    out = lenet(img)
-                    acc_top1 = fluid.layers.accuracy(
-                        input=out, label=label, k=1)
-                    acc_top5 = fluid.layers.accuracy(
-                        input=out, label=label, k=5)
-
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
-                            format(epoch, batch_id,
-                                   acc_top1.numpy(), acc_top5.numpy()))
-
-            # save weights
-            model_dict = lenet.state_dict()
-            fluid.save_dygraph(model_dict, "save_temp")
-
-            # test the correctness of `paddle.jit.save`
-            data = next(test_reader())
-            test_data = np.array([x[0].reshape(1, 28, 28)
-                                  for x in data]).astype('float32')
-            test_img = fluid.dygraph.to_variable(test_data)
-            lenet.eval()
-            before_save = lenet(test_img)
-
-        # save inference quantized model
-        paddle.jit.save(
-            layer=lenet,
-            path=TestImperativeAddQuantDequant.save_path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        [inference_program, feed_target_names,
-         fetch_targets] = fluid.io.load_inference_model(
-             dirname=TestImperativeAddQuantDequant.root_path,
-             executor=exe,
-             model_filename="lenet" + INFER_MODEL_SUFFIX,
-             params_filename="lenet" + INFER_PARAMS_SUFFIX)
-        after_save, = exe.run(inference_program,
-                              feed={feed_target_names[0]: test_data},
-                              fetch_list=fetch_targets)
-
-        self.assertTrue(
-            np.allclose(after_save, before_save.numpy()),
-            msg='Failed to save the inference quantized model.')
-
-    def test_qat_acc(self):
-        def _build_static_lenet(main, startup, is_test=False, seed=1000):
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    main.random_seed = seed
-                    startup.random_seed = seed
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32')
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64')
-                    prediction = StaticLenet(img)
-                    if not is_test:
-                        loss = fluid.layers.cross_entropy(
-                            input=prediction, label=label)
-                        avg_loss = fluid.layers.mean(loss)
-                    else:
-                        avg_loss = prediction
-            return img, label, avg_loss
-
-        reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
-        weight_quantize_type = 'abs_max'
-        activation_quant_type = 'moving_average_abs_max'
-        param_init_map = {}
-        seed = 1000
-        lr = 0.001
-
-        # imperative train
-        _logger.info(
-            "--------------------------dynamic graph qat--------------------------"
-        )
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type=weight_quantize_type,
-            activation_quantize_type=activation_quant_type,
-            quantizable_layer_type=[
-                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
-                'Swish'
-            ])
-
-        with fluid.dygraph.guard():
-            np.random.seed(seed)
-            fluid.default_main_program().random_seed = seed
-            fluid.default_startup_program().random_seed = seed
-            lenet = ImperativeLenet()
-            fixed_state = {}
-            for name, param in lenet.named_parameters():
-                p_shape = param.numpy().shape
-                p_value = param.numpy()
-                if name.endswith("bias"):
-                    value = np.zeros_like(p_value).astype('float32')
-                else:
-                    value = np.random.normal(
-                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
-                            p_shape).astype('float32')
-                fixed_state[name] = value
-                param_init_map[param.name] = value
-            lenet.set_dict(fixed_state)
-
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=lr, parameter_list=lenet.parameters())
-            dynamic_loss_rec = []
-            lenet.train()
-            for batch_id, data in enumerate(reader()):
-                x_data = np.array([x[0].reshape(1, 28, 28)
-                                   for x in data]).astype('float32')
-                y_data = np.array(
-                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                img = fluid.dygraph.to_variable(x_data)
-                label = fluid.dygraph.to_variable(y_data)
-
-                out = lenet(img)
-                loss = fluid.layers.cross_entropy(out, label)
-                avg_loss = fluid.layers.mean(loss)
-                avg_loss.backward()
-                adam.minimize(avg_loss)
-                lenet.clear_gradients()
-                dynamic_loss_rec.append(avg_loss.numpy()[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
-                if batch_id > 500:
-                    break
-            lenet.eval()
-        paddle.jit.save(
-            layer=lenet,
-            path=TestImperativeAddQuantDequant.dynamic_save_path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        # static graph train
-        _logger.info(
-            "--------------------------static graph qat--------------------------"
-        )
-        static_loss_rec = []
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        main = fluid.Program()
-        infer = fluid.Program()
-        startup = fluid.Program()
-        static_img, static_label, static_loss = _build_static_lenet(
-            main, startup, False, seed)
-        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
-                                                      seed)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                opt = AdamOptimizer(learning_rate=lr)
-                opt.minimize(static_loss)
-
-        scope = core.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup)
-        for param in main.all_parameters():
-            param_tensor = scope.var(param.name).get_tensor()
-            param_tensor.set(param_init_map[param.name], place)
-
-        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
-        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=scope,
-            place=place,
-            activation_quantize_type=activation_quant_type,
-            weight_quantize_type=weight_quantize_type,
-            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
-        add_quant_dequant_pass = AddQuantDequantPass(
-            scope=scope,
-            place=place,
-            quantizable_op_type=[
-                'relu', 'leaky_relu', 'relu6', 'tanh', 'swish'
-            ])
-        transform_pass.apply(main_graph)
-        transform_pass.apply(infer_graph)
-        add_quant_dequant_pass.apply(main_graph)
-        add_quant_dequant_pass.apply(infer_graph)
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
-            loss_name=static_loss.name, build_strategy=build_strategy)
-
-        feeder = fluid.DataFeeder(
-            feed_list=[static_img, static_label], place=place)
-        with fluid.scope_guard(scope):
-            for batch_id, data in enumerate(reader()):
-                loss_v, = exe.run(binary,
-                                  feed=feeder.feed(data),
-                                  fetch_list=[static_loss])
-                static_loss_rec.append(loss_v[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', loss_v))
-
-        save_program = infer_graph.to_program()
-        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
-                                          [infer_pre], exe, save_program)
-        rtol = 1e-08
-        atol = 1e-10
-        for i, (loss_d,
-                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
-            diff = np.abs(loss_d - loss_s)
-            if diff > (atol + rtol * np.abs(loss_s)):
-                _logger.info(
-                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
-                    format(diff, i, loss_d, loss_s))
-                break
-
-        self.assertTrue(
-            np.allclose(
-                np.array(dynamic_loss_rec),
-                np.array(static_loss_rec),
-                rtol=rtol,
-                atol=atol,
-                equal_nan=True),
-            msg='Failed to do the imperative qat.')
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
@@ -19,18 +19,13 @@ import numpy as np
 import random
 import unittest
 import logging
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
-from paddle.fluid.dygraph.container import Sequential
-from paddle.nn import Linear, Conv2D, Softmax
-from paddle.fluid.dygraph.nn import Pool2D
 from paddle.fluid.log_helper import get_logger
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+
+from test_imperative_qat import TestImperativeQat

 paddle.enable_static()

@@ -42,388 +37,14 @@ _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')


-def StaticLenet(data, num_classes=10):
-    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-    conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-    conv1 = fluid.layers.conv2d(
-        data,
-        num_filters=6,
-        filter_size=3,
-        stride=1,
-        padding=1,
-        param_attr=conv2d_w1_attr,
-        bias_attr=conv2d_b1_attr)
-    pool1 = fluid.layers.pool2d(
-        conv1, pool_size=2, pool_type='max', pool_stride=2)
-    conv2 = fluid.layers.conv2d(
-        pool1,
-        num_filters=16,
-        filter_size=5,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w2_attr,
-        bias_attr=conv2d_b2_attr)
-    pool2 = fluid.layers.pool2d(
-        conv2, pool_size=2, pool_type='max', pool_stride=2)
-
-    fc1 = fluid.layers.fc(input=pool2,
-                          size=120,
-                          param_attr=fc_w1_attr,
-                          bias_attr=fc_b1_attr)
-    fc2 = fluid.layers.fc(input=fc1,
-                          size=84,
-                          param_attr=fc_w2_attr,
-                          bias_attr=fc_b2_attr)
-    fc3 = fluid.layers.fc(input=fc2,
-                          size=num_classes,
-                          param_attr=fc_w3_attr,
-                          bias_attr=fc_b3_attr)
-    fc3 = fluid.layers.softmax(fc3, use_cudnn=True)
-
-    return fc3
-
-
-class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10):
-        super(ImperativeLenet, self).__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-        self.features = Sequential(
-            Conv2D(
-                in_channels=1,
-                out_channels=6,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=conv2d_w1_attr,
-                bias_attr=conv2d_b1_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2),
-            Conv2D(
-                in_channels=6,
-                out_channels=16,
-                kernel_size=5,
-                stride=1,
-                padding=0,
-                weight_attr=conv2d_w2_attr,
-                bias_attr=conv2d_b2_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2))
-
-        self.fc = Sequential(
-            Linear(
-                in_features=400,
-                out_features=120,
-                weight_attr=fc_w1_attr,
-                bias_attr=fc_b1_attr),
-            Linear(
-                in_features=120,
-                out_features=84,
-                weight_attr=fc_w2_attr,
-                bias_attr=fc_b2_attr),
-            Linear(
-                in_features=84,
-                out_features=num_classes,
-                weight_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr),
-            Softmax())
-
-    def forward(self, inputs):
-        x = self.features(inputs)
-        x = fluid.layers.flatten(x, 1)
-        x = self.fc(x)
-        return x
-
-
-class TestImperativeQatChannelWise(unittest.TestCase):
-    """
-    QAT = quantization-aware training
-    """
-
-    def test_qat_save(self):
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type='channel_wise_abs_max',
-            activation_quantize_type='moving_average_abs_max')
-
-        with fluid.dygraph.guard():
-            lenet = ImperativeLenet()
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=0.001, parameter_list=lenet.parameters())
-            train_reader = paddle.batch(
-                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=32)
-
-            epoch_num = 1
-            for epoch in range(epoch_num):
-                lenet.train()
-                for batch_id, data in enumerate(train_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-                    out = lenet(img)
-                    acc = fluid.layers.accuracy(out, label)
-                    loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.mean(loss)
-                    avg_loss.backward()
-                    adam.minimize(avg_loss)
-                    lenet.clear_gradients()
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
-                            format(epoch, batch_id,
-                                   avg_loss.numpy(), acc.numpy()))
-
-                lenet.eval()
-                for batch_id, data in enumerate(test_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-
-                    out = lenet(img)
-                    acc_top1 = fluid.layers.accuracy(
-                        input=out, label=label, k=1)
-                    acc_top5 = fluid.layers.accuracy(
-                        input=out, label=label, k=5)
-
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
-                            format(epoch, batch_id,
-                                   acc_top1.numpy(), acc_top5.numpy()))
-
-            # save weights
-            model_dict = lenet.state_dict()
-            fluid.save_dygraph(model_dict, "save_temp")
-
-            # test the correctness of `paddle.jit.save`
-            data = next(test_reader())
-            test_data = np.array([x[0].reshape(1, 28, 28)
-                                  for x in data]).astype('float32')
-            test_img = fluid.dygraph.to_variable(test_data)
-            lenet.eval()
-            before_save = lenet(test_img)
-
-        # save inference quantized model
-        path = "./qat_infer_model/mnist"
-        save_dir = "./qat_infer_model"
-        paddle.jit.save(
-            layer=lenet,
-            path=path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        [inference_program, feed_target_names,
-         fetch_targets] = fluid.io.load_inference_model(
-             dirname=save_dir,
-             executor=exe,
-             model_filename="mnist" + INFER_MODEL_SUFFIX,
-             params_filename="mnist" + INFER_PARAMS_SUFFIX)
-        after_save, = exe.run(inference_program,
-                              feed={feed_target_names[0]: test_data},
-                              fetch_list=fetch_targets)
-
-        self.assertTrue(
-            np.allclose(after_save, before_save.numpy()),
-            msg='Failed to save the inference quantized model.')
-
-    def test_qat_acc(self):
-        def _build_static_lenet(main, startup, is_test=False, seed=1000):
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    main.random_seed = seed
-                    startup.random_seed = seed
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32')
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64')
-                    prediction = StaticLenet(img)
-                    if not is_test:
-                        loss = fluid.layers.cross_entropy(
-                            input=prediction, label=label)
-                        avg_loss = fluid.layers.mean(loss)
-                    else:
-                        avg_loss = prediction
-            return img, label, avg_loss
-
-        reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
-        weight_quantize_type = 'channel_wise_abs_max'
-        activation_quant_type = 'moving_average_abs_max'
-        param_init_map = {}
-        seed = 1000
-        lr = 0.001
-
-        # imperative train
-        _logger.info(
-            "--------------------------dynamic graph qat--------------------------"
-        )
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type=weight_quantize_type,
-            activation_quantize_type=activation_quant_type)
-
-        with fluid.dygraph.guard():
-            np.random.seed(seed)
-            fluid.default_main_program().random_seed = seed
-            fluid.default_startup_program().random_seed = seed
-            lenet = ImperativeLenet()
-            fixed_state = {}
-            for name, param in lenet.named_parameters():
-                p_shape = param.numpy().shape
-                p_value = param.numpy()
-                if name.endswith("bias"):
-                    value = np.zeros_like(p_value).astype('float32')
-                else:
-                    value = np.random.normal(
-                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
-                            p_shape).astype('float32')
-                fixed_state[name] = value
-                param_init_map[param.name] = value
-            lenet.set_dict(fixed_state)
-
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=lr, parameter_list=lenet.parameters())
-            dynamic_loss_rec = []
-            lenet.train()
-            for batch_id, data in enumerate(reader()):
-                x_data = np.array([x[0].reshape(1, 28, 28)
-                                   for x in data]).astype('float32')
-                y_data = np.array(
-                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                img = fluid.dygraph.to_variable(x_data)
-                label = fluid.dygraph.to_variable(y_data)
-
-                out = lenet(img)
-                loss = fluid.layers.cross_entropy(out, label)
-                avg_loss = fluid.layers.mean(loss)
-                avg_loss.backward()
-                adam.minimize(avg_loss)
-                lenet.clear_gradients()
-                dynamic_loss_rec.append(avg_loss.numpy()[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
-
-        paddle.jit.save(
-            layer=lenet,
-            path="./dynamic_mnist/model",
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        # static graph train
-        _logger.info(
-            "--------------------------static graph qat--------------------------"
-        )
-        static_loss_rec = []
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        main = fluid.Program()
-        infer = fluid.Program()
-        startup = fluid.Program()
-        static_img, static_label, static_loss = _build_static_lenet(
-            main, startup, False, seed)
-        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
-                                                      seed)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                opt = AdamOptimizer(learning_rate=lr)
-                opt.minimize(static_loss)
-
-        scope = core.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup)
-        for param in main.all_parameters():
-            param_tensor = scope.var(param.name).get_tensor()
-            param_tensor.set(param_init_map[param.name], place)
-
-        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
-        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=scope,
-            place=place,
-            activation_quantize_type=activation_quant_type,
-            weight_quantize_type=weight_quantize_type,
-            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
-        transform_pass.apply(main_graph)
-        transform_pass.apply(infer_graph)
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
-            loss_name=static_loss.name, build_strategy=build_strategy)
-
-        feeder = fluid.DataFeeder(
-            feed_list=[static_img, static_label], place=place)
-        with fluid.scope_guard(scope):
-            for batch_id, data in enumerate(reader()):
-                loss_v, = exe.run(binary,
-                                  feed=feeder.feed(data),
-                                  fetch_list=[static_loss])
-                static_loss_rec.append(loss_v[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', loss_v))
-
-        save_program = infer_graph.to_program()
-        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
-                                          [infer_pre], exe, save_program)
-        rtol = 1e-05
-        atol = 1e-08
-        for i, (loss_d,
-                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
-            diff = np.abs(loss_d - loss_s)
-            if diff > (atol + rtol * np.abs(loss_s)):
-                _logger.info(
-                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
-                    format(diff, i, loss_d, loss_s))
-                break
+class TestImperativeQatChannelWise(TestImperativeQat):
+    def set_vars(self):
+        self.weight_quantize_type = 'channel_wise_abs_max'
+        self.activation_quantize_type = 'moving_average_abs_max'
+        print('weight_quantize_type', self.weight_quantize_type)

-        self.assertTrue(
-            np.allclose(
-                np.array(dynamic_loss_rec),
-                np.array(static_loss_rec),
-                rtol=rtol,
-                atol=atol,
-                equal_nan=True),
-            msg='Failed to do the imperative qat.')
+    def test_qat(self):
+        self.run_qat_save()


 if __name__ == '__main__':

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
@@ -31,6 +31,8 @@ from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
 from paddle.fluid.dygraph.nn import Pool2D
 from paddle.fluid.log_helper import get_logger

+from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenetWithSkipQuant
+
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
@@ -39,144 +41,33 @@ _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')


-class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10):
-        super(ImperativeLenet, self).__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-        self.conv2d_0 = Conv2D(
-            in_channels=1,
-            out_channels=6,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            weight_attr=conv2d_w1_attr,
-            bias_attr=conv2d_b1_attr)
-        self.conv2d_0.skip_quant = True
-
-        self.batch_norm_0 = BatchNorm(6)
-        self.relu_0 = ReLU()
-        self.pool2d_0 = Pool2D(pool_size=2, pool_type='max', pool_stride=2)
-        self.conv2d_1 = Conv2D(
-            in_channels=6,
-            out_channels=16,
-            kernel_size=5,
-            stride=1,
-            padding=0,
-            weight_attr=conv2d_w2_attr,
-            bias_attr=conv2d_b2_attr)
-        self.conv2d_1.skip_quant = False
-
-        self.batch_norm_1 = BatchNorm(16)
-        self.relu6_0 = ReLU6()
-        self.pool2d_1 = Pool2D(pool_size=2, pool_type='max', pool_stride=2)
-        self.linear_0 = Linear(
-            in_features=400,
-            out_features=120,
-            weight_attr=fc_w1_attr,
-            bias_attr=fc_b1_attr)
-        self.linear_0.skip_quant = True
-
-        self.leaky_relu_0 = LeakyReLU()
-        self.linear_1 = Linear(
-            in_features=120,
-            out_features=84,
-            weight_attr=fc_w2_attr,
-            bias_attr=fc_b2_attr)
-        self.linear_1.skip_quant = False
-
-        self.sigmoid_0 = Sigmoid()
-        self.linear_2 = Linear(
-            in_features=84,
-            out_features=num_classes,
-            weight_attr=fc_w3_attr,
-            bias_attr=fc_b3_attr)
-        self.linear_2.skip_quant = False
-        self.softmax_0 = Softmax()
-
-    def forward(self, inputs):
-        x = self.conv2d_0(inputs)
-        x = self.batch_norm_0(x)
-        x = self.relu_0(x)
-        x = self.pool2d_0(x)
-        x = self.conv2d_1(x)
-        x = self.batch_norm_1(x)
-        x = self.relu6_0(x)
-        x = self.pool2d_1(x)
-
-        x = fluid.layers.flatten(x, 1)
-
-        x = self.linear_0(x)
-        x = self.leaky_relu_0(x)
-        x = self.linear_1(x)
-        x = self.sigmoid_0(x)
-        x = self.linear_2(x)
-        x = self.softmax_0(x)
-
-        return x
-
-
 class TestImperativeOutSclae(unittest.TestCase):
    def test_out_scale_acc(self):
        seed = 1000
        lr = 0.1

-        imperative_out_scale = ImperativeQuantAware()
+        qat = ImperativeQuantAware()

        np.random.seed(seed)
        reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=512, drop_last=True)
-        lenet = ImperativeLenet()
-        fixed_state = {}
-        for name, param in lenet.named_parameters():
-            p_shape = param.numpy().shape
-            p_value = param.numpy()
-            if name.endswith("bias"):
-                value = np.zeros_like(p_value).astype('float32')
-            else:
-                value = np.random.normal(
-                    loc=0.0, scale=0.01,
-                    size=np.product(p_shape)).reshape(p_shape).astype('float32')
-            fixed_state[name] = value
-        lenet.set_dict(fixed_state)
-        imperative_out_scale.quantize(lenet)
+
+        lenet = ImperativeLenetWithSkipQuant()
+        lenet = fix_model_dict(lenet)
+        qat.quantize(lenet)
+
        adam = AdamOptimizer(
            learning_rate=lr, parameter_list=lenet.parameters())
        dynamic_loss_rec = []
        lenet.train()
-        for batch_id, data in enumerate(reader()):
-            x_data = np.array([x[0].reshape(1, 28, 28)
-                               for x in data]).astype('float32')
-            y_data = np.array(
-                [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-            img = fluid.dygraph.to_variable(x_data)
-            label = fluid.dygraph.to_variable(y_data)
-
-            out = lenet(img)
-            loss = fluid.layers.cross_entropy(out, label)
-            avg_loss = fluid.layers.mean(loss)
-            avg_loss.backward()
-            adam.minimize(avg_loss)
-            lenet.clear_gradients()
-            dynamic_loss_rec.append(avg_loss.numpy()[0])
-            if batch_id % 100 == 0:
-                _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
+        loss_list = train_lenet(lenet, reader, adam)

        lenet.eval()

        path = "./save_dynamic_quant_infer_model/lenet"
        save_dir = "./save_dynamic_quant_infer_model"

-        imperative_out_scale.save_quantized_model(
+        qat.save_quantized_model(
            layer=lenet,
            path=path,
            input_spec=[

--- a/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import paddle
+
+
+class TestFunctionalLayers(unittest.TestCase):
+    """
+    """
+
+    def setUp(self):
+        paddle.disable_static()
+        np.random.seed(1)
+
+        shape = [3, 100, 120]
+        self.x = paddle.to_tensor(np.random.random(shape))
+        self.y = paddle.to_tensor(np.random.random(shape))
+
+    def check(self, x, y):
+        self.assertTrue(np.allclose(x.numpy(), y.numpy()))
+
+    def test_quant_add(self):
+        out_1 = paddle.add(self.x, self.y)
+        out_2 = paddle.nn.quant.add()(self.x, self.y)
+        self.check(out_1, out_2)
+
+    def test_quant_subtract(self):
+        out_1 = paddle.subtract(self.x, self.y)
+        out_2 = paddle.nn.quant.subtract()(self.x, self.y)
+        self.check(out_1, out_2)
+
+    def test_quant_multiply(self):
+        out_1 = paddle.multiply(self.x, self.y)
+        out_2 = paddle.nn.quant.multiply()(self.x, self.y)
+        self.check(out_1, out_2)
+
+    def test_quant_divide(self):
+        out_1 = paddle.divide(self.x, self.y)
+        out_2 = paddle.nn.quant.divide()(self.x, self.y)
+        self.check(out_1, out_2)
+
+    def test_quant_reshape(self):
+        reshape = [120, 300]
+        out_1 = paddle.reshape(self.x, reshape)
+        out_2 = paddle.nn.quant.reshape()(self.x.clone(), reshape)
+        self.check(out_1, out_2)
+        self.assertTrue(out_1.shape == out_2.shape)
+
+    def test_quant_transpose(self):
+        perm = [1, 2, 0]
+        out_1 = paddle.transpose(self.x, perm)
+        out_2 = paddle.nn.quant.transpose()(self.x.clone(), perm)
+        self.check(out_1, out_2)
+        self.assertTrue(out_1.shape == out_2.shape)
+
+    def test_quant_concat(self):
+        out_1 = paddle.concat([self.x, self.y], axis=0)
+        out_2 = paddle.nn.quant.concat()([self.x, self.y], 0)
+        self.check(out_1, out_2)
+        self.assertTrue(out_1.shape == out_2.shape)
+
+    def test_quant_flatten(self):
+        start_axis = 1
+        end_axis = 2
+        out_1 = paddle.flatten(self.x, start_axis, end_axis)
+        out_2 = paddle.nn.quant.flatten()(self.x.clone(), start_axis, end_axis)
+        self.check(out_1, out_2)
+        self.assertTrue(out_1.shape == out_2.shape)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -138,6 +138,7 @@ from ..fluid.dygraph.container import Sequential  # noqa: F401
 from . import utils  # noqa: F401
 from . import functional  # noqa: F401
 from . import initializer  # noqa: F401
+from . import quant  # noqa: F401

 #TODO: remove 'diag_embed', 'remove_weight_norm', 'weight_norm' months later.
 import paddle.utils.deprecated as deprecated

--- a/python/paddle/nn/quant/__init__.py
+++ b/python/paddle/nn/quant/__init__.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .functional_layers import FloatFunctionalLayer  # noqa: F401
+from .functional_layers import add  # noqa: F401
+from .functional_layers import subtract  # noqa: F401
+from .functional_layers import multiply  # noqa: F401
+from .functional_layers import divide  # noqa: F401
+from .functional_layers import reshape  # noqa: F401
+from .functional_layers import transpose  # noqa: F401
+from .functional_layers import concat  # noqa: F401
+from .functional_layers import flatten  # noqa: F401
+
+__all__ = []
--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ...fluid.dygraph import layers
+from ...tensor import math, manipulation
+
+__all__ = []
+
+
+class FloatFunctionalLayer(layers.Layer):
+    def __init__(self):
+        super(FloatFunctionalLayer, self).__init__()
+
+
+class add(FloatFunctionalLayer):
+    def __init__(self):
+        super(add, self).__init__()
+
+    def forward(self, x, y, name=None):
+        return math.add(x, y, name)
+
+
+class subtract(FloatFunctionalLayer):
+    def __init__(self):
+        super(subtract, self).__init__()
+
+    def forward(self, x, y, name=None):
+        return math.subtract(x, y, name)
+
+
+class multiply(FloatFunctionalLayer):
+    def __init__(self):
+        super(multiply, self).__init__()
+
+    def forward(self, x, y, name=None):
+        return math.multiply(x, y, name)
+
+
+class divide(FloatFunctionalLayer):
+    def __init__(self):
+        super(divide, self).__init__()
+
+    def forward(self, x, y, name=None):
+        return math.divide(x, y, name)
+
+
+class reshape(FloatFunctionalLayer):
+    def __init__(self):
+        super(reshape, self).__init__()
+
+    def forward(self, x, shape, name=None):
+        return manipulation.reshape(x, shape, name)
+
+
+class transpose(FloatFunctionalLayer):
+    def __init__(self):
+        super(transpose, self).__init__()
+
+    def forward(self, x, perm, name=None):
+        return manipulation.transpose(x, perm, name)
+
+
+class concat(FloatFunctionalLayer):
+    def __init__(self):
+        super(concat, self).__init__()
+
+    def forward(self, x, axis=0, name=None):
+        return manipulation.concat(x, axis, name)
+
+
+class flatten(FloatFunctionalLayer):
+    def __init__(self):
+        super(flatten, self).__init__()
+
+    def forward(self, x, start_axis=0, stop_axis=-1, name=None):
+        return manipulation.flatten(x, start_axis, stop_axis, name)
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -213,6 +213,7 @@ packages=['paddle',
          'paddle.nn',
          'paddle.nn.functional',
          'paddle.nn.layer',
+          'paddle.nn.quant',
          'paddle.nn.initializer',
          'paddle.nn.utils',
          'paddle.metric',