diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
index bcd2ad2b1fa64c6ba98318d39a2074aeeb84369e..58bfc58dccc73eb613b2f18ba60dc6340dadfddc 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -23,9 +23,10 @@ from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.nn import Linear, Conv2D
 from paddle.fluid.dygraph.nn import BatchNorm, Pool2D, Conv2DTranspose
 from paddle.fluid.io import load_inference_model, save_inference_model
-from paddle.nn.layer.activation import ReLU, LeakyReLU, Sigmoid, ReLU6, Tanh, Softmax, PReLU
+from paddle.nn.layer.activation import ReLU, LeakyReLU, Sigmoid, ReLU6, Tanh, Softmax, PReLU, Swish
 from paddle.fluid.log_helper import get_logger
 from . import quant_nn
+from .. import quantization_pass
 
 __all__ = ['ImperativeQuantAware', 'ImperativeCalcOutScale']
 
@@ -45,6 +46,7 @@ _op_real_in_out_name = {
     "tanh": [["X"], ["Out"]],
     "batch_norm": [["X"], ["Y"]],
     "sigmoid": [["X"], ["Out"]],
+    "swish": [["X"], ["Out"]],
 }
 
 
@@ -109,7 +111,12 @@ class ImperativeQuantAware(object):
                 activation and returns dequantized activation. If None, will use
                 quantization op defined by 'activation_quantize_type'. Default is None.
 
-        Examples:
+        Note:
+            If user sets attribute 'skip_quant' to a Layer that support dynamic quantization and sets
+            it to true, the layer would not be quantized during training. If this attribute is not sets
+            or the attribute is false, the Layer would be qunatized in training.
+
+        Examples 1:
         .. code-block:: python
 
             import paddle
@@ -126,18 +133,62 @@ class ImperativeQuantAware(object):
             
             # Add the fake quant logical.
             # The original model will be rewrite.
+            # The outscale of outputs in supportted layers would be calculated.
             imperative_qat.quantize(model)
 
             # Fine-tune the quantized model
             # ...
             
             # Save quant model for the inference.
-            paddle.jit.save(
+            imperative_qat.save_quantized_model(
                 layer=model,
                 model_path="./resnet50_qat",
                 input_spec=[
                     paddle.static.InputSpec(
                     shape=[None, 3, 224, 224], dtype='float32')])
+
+        Examples 2:
+        .. code-block:: python
+
+            import paddle
+            from paddle.fluid.contrib.slim.quantization \
+                import ImperativeQuantAware
+
+            class ImperativeModel(paddle.nn.Layer):
+                def __init__(self):
+                    super(ImperativeModel, self).__init__()
+                    # self.linear_0 would skip the quantization.
+                    self.linear_0 = paddle.nn.Linear(784, 400)
+                    self.linear_0.skip_quant = True
+
+                    # self.linear_1 would not skip the quantization.
+                    self.linear_1 = paddle.nn.Linear(400, 10)
+                    self.linear_1.skip_quant = False
+
+                def forward(self, inputs):
+                    x = self.linear_0(inputs)
+                    x = self.linear_1(inputs)
+                    return x
+
+            model = ImperativeModel()
+            imperative_qat = ImperativeQuantAware(
+                weight_quantize_type='abs_max',
+                activation_quantize_type='moving_average_abs_max')
+
+            # Add the fake quant logical.
+            # The original model will be rewrite.
+            #
+            # There is only one Layer(self.linear1) would be added the
+            # fake quant logical.
+            imperative_qat.quantize(model)
+
+            # Fine-tune the quantized model
+            # ...
+
+            # Save quant model for the inference.
+            imperative_qat.save_quantized_model(
+                layer=model,
+                model_path="./imperative_model_qat")
         """
         super(ImperativeQuantAware, self).__init__()
         self._weight_bits = weight_bits
@@ -150,6 +201,7 @@ class ImperativeQuantAware(object):
         self._act_pre_layer = act_preprocess_layer
         self._weight_quant_layer = weight_quantize_layer
         self._act_quant_layer = act_quantize_layer
+        self._out_scale = ImperativeCalcOutScale()
 
         t_check = lambda method: method is None or issubclass(method, dygraph.layers.Layer)
         assert t_check(
@@ -189,7 +241,7 @@ class ImperativeQuantAware(object):
         """
         According to weights' and activations' quantization types, the model will be added some fake
         quant ops, such as fake_quantize_dequantize_moving_average_abs_max, fake_quantize_dequantize_abs_max
-        and so on.
+        and so on. At the same time, the out_scale value of outputs would be calculated.
 
         Args:
             model(fluid.dygraph.Layer): the model to be quantized.
@@ -199,6 +251,9 @@ class ImperativeQuantAware(object):
         for name, layer in model.named_sublayers():
             if not isinstance(layer, self._quantizable_layer_type):
                 continue
+            if hasattr(layer, "skip_quant") and layer.skip_quant == True:
+                continue
+
             scopes = name.split('.')
             target = scopes[-1]
             obj = model
@@ -210,6 +265,8 @@ class ImperativeQuantAware(object):
             quant_layer = self._get_quantized_counterpart(layer)
             setattr(obj, target, quant_layer)
 
+        self._out_scale.calc_out_scale(model)
+
     def _get_quantized_counterpart(self, layer):
         quant_layers = tuple(self._quant_layers_map.values())
         quantized_counterpart = tuple('Quantized' + k
@@ -233,47 +290,24 @@ class ImperativeQuantAware(object):
             self._weight_quant_layer, self._act_quant_layer)
         return quantized_layer
 
+    def save_quantized_model(self, layer, path, input_spec=None, **config):
+        self._out_scale.save_quantized_model(layer, path, input_spec, **config)
+
 
 class ImperativeCalcOutScale(object):
-    def __init__(self,
-                 moving_rate=0.9,
-                 target_layer_types=[
-                     'BatchNorm', 'Conv2D', 'Conv2DTranspose', 'LeakyReLU',
-                     'Linear', 'PReLU', 'Pool2D', 'ReLU', 'ReLU6', 'Sigmoid',
-                     'Softmax', 'Tanh'
-                 ]):
+    def __init__(self, moving_rate=0.9):
         """
         Add the logic of calculating and setting output quantization scales of some layers.
         These output quantization scales may be used by tensorRT or some other inference engines.
 
         Args:
             moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
-            quantizable_op_type(list[str]): List the type of layers that will be calculated out_scale. 
-                Default is ['Conv2D', 'ReLU', 'PReLU', 'LeakyReLU', 'Linear', 'Sigmoid', 'BatchNorm', 'ReLU6', 'Tanh', 'Softmax', 'Conv2DTranspose']
         """
         super(ImperativeCalcOutScale, self).__init__()
         self._moving_rate = moving_rate
-        self._out_scale_layers_map = {
-            'BatchNorm': BatchNorm,
-            'Conv2D': Conv2D,
-            'Conv2DTranspose': Conv2DTranspose,
-            'LeakyReLU': LeakyReLU,
-            'Linear': Linear,
-            'PReLU': PReLU,
-            'Pool2D': Pool2D,
-            'ReLU': ReLU,
-            'ReLU6': ReLU6,
-            'Sigmoid': Sigmoid,
-            'Softmax': Softmax,
-            'Tanh': Tanh
-        }
-        self._out_scale_layer_type = tuple(
-            self._out_scale_layers_map[layer]
-            if layer in self._out_scale_layers_map else layer
-            for layer in target_layer_types)
-        for layer in self._out_scale_layer_type:
-            assert not isinstance(
-                layer, str), "{} is unspported to be out_scaled.".format(layer)
+        self._out_scale_layer_type_list = (
+            BatchNorm, Conv2D, Conv2DTranspose, LeakyReLU, Linear, PReLU,
+            Pool2D, ReLU, ReLU6, Sigmoid, Softmax, Tanh, Swish)
         self._register_hook_handle_list = []
         self._out_scale_dict = {}
 
@@ -290,26 +324,12 @@ class ImperativeCalcOutScale(object):
         assert isinstance(
             model, dygraph.Layer), "model must be the instance of dygraph.Layer"
         for _, layer in model.named_sublayers():
-            if not isinstance(layer, self._out_scale_layer_type):
+            if not isinstance(layer, self._out_scale_layer_type_list):
                 continue
             forward_post_hook_handle = layer.register_forward_post_hook(
                 self._forward_post_hook)
             self._register_hook_handle_list.append(forward_post_hook_handle)
 
-    # Get the output var name of the op
-    def _get_op_output_names(self, op):
-        assert isinstance(
-            op, framework.Operator), "The input op should be Operator."
-        var_names = []
-        name_list = _op_real_in_out_name[op.type][1]
-        for name in name_list:
-            var_name = op.output(name)
-            if isinstance(var_name, list):
-                var_names.extend(var_name)
-            else:
-                var_names.append(var_name)
-        return var_names
-
     def save_quantized_model(self, layer, path, input_spec=None, **config):
         """
         Save the quantized model for the inference.
@@ -335,6 +355,7 @@ class ImperativeCalcOutScale(object):
 
         assert isinstance(
             layer, dygraph.Layer), "model must be the instance of dygraph.Layer"
+        is_dynamic_mode = False
         with dygraph.guard():
             layer.eval()
             for handle in self._register_hook_handle_list:
@@ -345,6 +366,10 @@ class ImperativeCalcOutScale(object):
 
         paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config)
 
+        if paddle.in_dynamic_mode():
+            is_dynamic_mode = True
+            paddle.enable_static()
+
         if core.is_compiled_with_cuda():
             place = core.CUDAPlace(0)
         else:
@@ -369,7 +394,8 @@ class ImperativeCalcOutScale(object):
         for block in inference_program.blocks:
             for op in block.ops:
                 if op.type in _op_real_in_out_name:
-                    output_var_names = self._get_op_output_names(op)
+                    output_var_names = quantization_pass._get_op_output_var_names(
+                        op)
                     for output_var_name in output_var_names:
                         output_var_tensor = block.var(output_var_name)
                         if output_var_tensor.dtype not in [
@@ -386,6 +412,8 @@ class ImperativeCalcOutScale(object):
                         # to dygraph Layer by the name of output. And use dict to save
                         # the corresponding relationship between the dygraph Layer and the
                         # static graph op that needs to set the outscale attribute.
+                        if '.' not in output_var_name:
+                            continue
                         dynamic_layer_name, var_name_suffix = output_var_name.split(
                             ".")
                         if dynamic_layer_name in layer_var_dict:
@@ -420,9 +448,12 @@ class ImperativeCalcOutScale(object):
             model_filename=model_filename,
             params_filename=params_filename)
 
+        if is_dynamic_mode:
+            paddle.disable_static()
+
     def _forward_post_hook(self, layer, input, output):
         assert isinstance(
-            output, core.VarBase
+            output, (core.VarBase, framework.Variable)
         ), "Multiple outputs are not currently supported in ImperativeOutScale."
         if output.dtype not in [
                 core.VarDesc.VarType.FP32, core.VarDesc.VarType.FP64
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
index 3fc8352493d93406d1f82082df268cbb04a244fc..a900096a995227223e223cd473fbd18a29812f70 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
@@ -25,12 +25,13 @@ import paddle.fluid.layers as layers
 from paddle.fluid import core
 from paddle.fluid.optimizer import AdamOptimizer
 from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import ImperativeCalcOutScale
-from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass
+from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
+from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass, QuantizationTransformPass
 from paddle.fluid.dygraph.container import Sequential
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6
-from paddle.fluid.dygraph.nn import BatchNorm, Conv2D, Linear, Pool2D
+from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
+from paddle.fluid.dygraph.nn import Pool2D
 from paddle.fluid.log_helper import get_logger
 
 paddle.enable_static()
@@ -91,10 +92,10 @@ def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
     sigmoid1 = layers.sigmoid(fc2)
     fc3 = fluid.layers.fc(input=sigmoid1,
                           size=num_classes,
-                          act=classifier_activation,
                           param_attr=fc_w3_attr,
                           bias_attr=fc_b3_attr)
-    return fc3
+    softmax1 = layers.softmax(fc3, use_cudnn=True)
+    return softmax1
 
 
 class ImperativeLenet(fluid.dygraph.Layer):
@@ -112,24 +113,24 @@ class ImperativeLenet(fluid.dygraph.Layer):
         fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
         self.features = Sequential(
             Conv2D(
-                num_channels=1,
-                num_filters=6,
-                filter_size=3,
+                in_channels=1,
+                out_channels=6,
+                kernel_size=3,
                 stride=1,
                 padding=1,
-                param_attr=conv2d_w1_attr,
+                weight_attr=conv2d_w1_attr,
                 bias_attr=conv2d_b1_attr),
             BatchNorm(6),
             ReLU(),
             Pool2D(
                 pool_size=2, pool_type='max', pool_stride=2),
             Conv2D(
-                num_channels=6,
-                num_filters=16,
-                filter_size=5,
+                in_channels=6,
+                out_channels=16,
+                kernel_size=5,
                 stride=1,
                 padding=0,
-                param_attr=conv2d_w2_attr,
+                weight_attr=conv2d_w2_attr,
                 bias_attr=conv2d_b2_attr),
             BatchNorm(16),
             ReLU6(),
@@ -138,23 +139,23 @@ class ImperativeLenet(fluid.dygraph.Layer):
 
         self.fc = Sequential(
             Linear(
-                input_dim=400,
-                output_dim=120,
-                param_attr=fc_w1_attr,
+                in_features=400,
+                out_features=120,
+                weight_attr=fc_w1_attr,
                 bias_attr=fc_b1_attr),
             LeakyReLU(),
             Linear(
-                input_dim=120,
-                output_dim=84,
-                param_attr=fc_w2_attr,
+                in_features=120,
+                out_features=84,
+                weight_attr=fc_w2_attr,
                 bias_attr=fc_b2_attr),
             Sigmoid(),
             Linear(
-                input_dim=84,
-                act=classifier_activation,
-                output_dim=num_classes,
-                param_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr))
+                in_features=84,
+                out_features=num_classes,
+                weight_attr=fc_w3_attr,
+                bias_attr=fc_b3_attr),
+            Softmax())
 
     def forward(self, inputs):
         x = self.features(inputs)
@@ -165,105 +166,6 @@ class ImperativeLenet(fluid.dygraph.Layer):
 
 
 class TestImperativeOutSclae(unittest.TestCase):
-    def test_calc_out_scale_save(self):
-        imperative_out_scale = ImperativeCalcOutScale()
-
-        with fluid.dygraph.guard():
-            lenet = ImperativeLenet()
-            adam = AdamOptimizer(
-                learning_rate=0.001, parameter_list=lenet.parameters())
-            train_reader = paddle.batch(
-                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=32)
-            imperative_out_scale.calc_out_scale(lenet)
-            epoch_num = 1
-            for epoch in range(epoch_num):
-                lenet.train()
-                for batch_id, data in enumerate(train_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-                    out = lenet(img)
-                    acc = fluid.layers.accuracy(out, label)
-                    loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.mean(loss)
-                    avg_loss.backward()
-                    adam.minimize(avg_loss)
-                    lenet.clear_gradients()
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
-                            format(epoch, batch_id,
-                                   avg_loss.numpy(), acc.numpy()))
-                lenet.eval()
-                for batch_id, data in enumerate(test_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-
-                    out = lenet(img)
-                    acc_top1 = fluid.layers.accuracy(
-                        input=out, label=label, k=1)
-                    acc_top5 = fluid.layers.accuracy(
-                        input=out, label=label, k=5)
-
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
-                            format(epoch, batch_id,
-                                   acc_top1.numpy(), acc_top5.numpy()))
-
-            # save weights
-            model_dict = lenet.state_dict()
-            fluid.save_dygraph(model_dict, "save_temp")
-
-            # test the correctness of `save_quantized_model`
-            data = next(test_reader())
-            test_data = np.array([x[0].reshape(1, 28, 28)
-                                  for x in data]).astype('float32')
-            test_img = fluid.dygraph.to_variable(test_data)
-            lenet.eval()
-            before_save = lenet(test_img)
-
-        # save inference quantized model
-        path = "./outscale_infer_model/lenet"
-        save_dir = "./outscale_infer_model"
-        imperative_out_scale.save_quantized_model(
-            layer=lenet,
-            path=path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        [inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-        after_save, = exe.run(inference_program,
-                              feed={feed_target_names[0]: test_data},
-                              fetch_list=fetch_targets)
-
-        self.assertTrue(
-            np.allclose(after_save, before_save.numpy()),
-            msg='Failed to save the inference quantized model.')
-
     def test_out_scale_acc(self):
         def _build_static_lenet(main, startup, is_test=False, seed=1000):
             with fluid.unique_name.guard():
@@ -285,6 +187,8 @@ class TestImperativeOutSclae(unittest.TestCase):
 
         reader = paddle.batch(
             paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
+        weight_quantize_type = 'abs_max'
+        activation_quant_type = 'moving_average_abs_max'
         param_init_map = {}
         seed = 1000
         lr = 0.1
@@ -295,7 +199,7 @@ class TestImperativeOutSclae(unittest.TestCase):
         _logger.info(
             "--------------------------dynamic graph qat--------------------------"
         )
-        imperative_out_scale = ImperativeCalcOutScale()
+        imperative_out_scale = ImperativeQuantAware()
 
         with fluid.dygraph.guard():
             np.random.seed(seed)
@@ -315,7 +219,7 @@ class TestImperativeOutSclae(unittest.TestCase):
                 fixed_state[name] = value
                 param_init_map[param.name] = value
             lenet.set_dict(fixed_state)
-            imperative_out_scale.calc_out_scale(lenet)
+            imperative_out_scale.quantize(lenet)
             adam = AdamOptimizer(
                 learning_rate=lr, parameter_list=lenet.parameters())
             dynamic_loss_rec = []
@@ -340,11 +244,9 @@ class TestImperativeOutSclae(unittest.TestCase):
                     _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
 
             lenet.eval()
-            op_object_list = (Conv2D, ReLU, ReLU6, LeakyReLU, Sigmoid, Pool2D,
-                              BatchNorm)
 
         path = "./dynamic_outscale_infer_model/lenet"
-        save_dir = "./dynamic_outscale_infer_model"
+        dynamic_save_dir = "./dynamic_outscale_infer_model"
 
         imperative_out_scale.save_quantized_model(
             layer=lenet,
@@ -384,8 +286,16 @@ class TestImperativeOutSclae(unittest.TestCase):
             param_tensor.set(param_init_map[param.name], place)
         main_graph = IrGraph(core.Graph(main.desc), for_test=False)
         infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = OutScaleForTrainingPass(scope=scope, place=place)
+        transform_pass = QuantizationTransformPass(
+            scope=scope,
+            place=place,
+            activation_quantize_type=activation_quant_type,
+            weight_quantize_type=weight_quantize_type,
+            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
         transform_pass.apply(main_graph)
+        transform_pass.apply(infer_graph)
+        outscale_pass = OutScaleForTrainingPass(scope=scope, place=place)
+        outscale_pass.apply(main_graph)
         build_strategy = fluid.BuildStrategy()
         build_strategy.fuse_all_reduce_ops = False
         binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
@@ -404,20 +314,18 @@ class TestImperativeOutSclae(unittest.TestCase):
         scale_inference_pass = OutScaleForInferencePass(scope=scope)
         scale_inference_pass.apply(infer_graph)
 
-        out_scale_op_list = [
-            "batch_norm", "conv2d", "leaky_relu", "pool2d", "relu6", "relu",
-            "sigmoid", "tanh", "relu6", "softmax", "conv2d_transpose",
-            "elementwise_add"
-        ]
-        op_nodes = infer_graph.all_op_nodes()
-        for op_node in op_nodes:
-            if op_node.name() in out_scale_op_list:
-                static_out_scale_list.append(op_node.op().attr("out_threshold"))
-
         save_program = infer_graph.to_program()
+        static_save_dir = "./static_outscale_infer_model"
         with fluid.scope_guard(scope):
-            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
-                                          [infer_pre], exe, save_program)
+            fluid.io.save_inference_model(
+                dirname=static_save_dir,
+                feeded_var_names=[infer_img.name],
+                target_vars=[infer_pre],
+                executor=exe,
+                main_program=save_program,
+                model_filename="lenet" + INFER_MODEL_SUFFIX,
+                params_filename="lenet" + INFER_PARAMS_SUFFIX)
+
         rtol = 1e-05
         atol = 1e-08
         for i, (loss_d,
@@ -437,24 +345,38 @@ class TestImperativeOutSclae(unittest.TestCase):
                 atol=atol,
                 equal_nan=True),
             msg='Failed to do the imperative qat.')
+
         # load dynamic model
-        [inference_program, feed_target_names, fetch_targets] = (
+        [dynamic_inference_program, feed_target_names, fetch_targets] = (
             fluid.io.load_inference_model(
-                dirname=save_dir,
+                dirname=dynamic_save_dir,
                 executor=exe,
                 model_filename="lenet" + INFER_MODEL_SUFFIX,
                 params_filename="lenet" + INFER_PARAMS_SUFFIX))
+        # load static model
+        [static_inference_program, feed_target_names, fetch_targets] = (
+            fluid.io.load_inference_model(
+                dirname=static_save_dir,
+                executor=exe,
+                model_filename="lenet" + INFER_MODEL_SUFFIX,
+                params_filename="lenet" + INFER_PARAMS_SUFFIX))
+
+        dynamic_ops = dynamic_inference_program.global_block().ops
+        static_ops = static_inference_program.global_block().ops
+
+        for op in dynamic_ops[:]:
+            if op.type == "flatten2" or 'fake' in op.type:
+                dynamic_ops.remove(op)
 
-        global_block = inference_program.global_block()
-        for op in global_block.ops:
-            if op.has_attr('out_threshold'):
-                dynamic_out_scale_list.append(op.attr('out_threshold'))
+        for op in static_ops[:]:
+            if 'fake' in op.type:
+                static_ops.remove(op)
 
-        check_list = [
-            False for item in dynamic_out_scale_list
-            if item not in static_out_scale_list
-        ]
-        self.assertTrue(len(check_list) == 0)
+        for i in range(len(dynamic_ops)):
+            if dynamic_ops[i].has_attr("out_threshold"):
+                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
+                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
+                                static_ops[i].attr("out_threshold"))
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..d030d1eb51122048ebd6b994584dbd887a7d14bc
--- /dev/null
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
@@ -0,0 +1,227 @@
+#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+from __future__ import print_function
+
+import os
+import numpy as np
+import random
+import unittest
+import logging
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+from paddle.fluid import core
+from paddle.fluid.optimizer import AdamOptimizer
+from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
+from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6
+from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
+from paddle.fluid.dygraph.nn import Pool2D
+from paddle.fluid.log_helper import get_logger
+
+os.environ["CPU_NUM"] = "1"
+if core.is_compiled_with_cuda():
+    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+
+_logger = get_logger(
+    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
+
+quant_skip_pattern_list = ['skip_qat', 'skip_quant']
+
+
+class ImperativeLenet(fluid.dygraph.Layer):
+    def __init__(self, num_classes=10, classifier_activation='softmax'):
+        super(ImperativeLenet, self).__init__()
+        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
+        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        self.conv2d_0 = Conv2D(
+            in_channels=1,
+            out_channels=6,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            weight_attr=conv2d_w1_attr,
+            bias_attr=conv2d_b1_attr)
+        self.conv2d_0.skip_quant = True
+
+        self.batch_norm_0 = BatchNorm(6)
+        self.relu_0 = ReLU()
+        self.pool2d_0 = Pool2D(pool_size=2, pool_type='max', pool_stride=2)
+        self.conv2d_1 = Conv2D(
+            in_channels=6,
+            out_channels=16,
+            kernel_size=5,
+            stride=1,
+            padding=0,
+            weight_attr=conv2d_w2_attr,
+            bias_attr=conv2d_b2_attr)
+        self.conv2d_1.skip_quant = False
+
+        self.batch_norm_1 = BatchNorm(16)
+        self.relu6_0 = ReLU6()
+        self.pool2d_1 = Pool2D(pool_size=2, pool_type='max', pool_stride=2)
+        self.linear_0 = Linear(
+            in_features=400,
+            out_features=120,
+            weight_attr=fc_w1_attr,
+            bias_attr=fc_b1_attr)
+        self.linear_0.skip_quant = True
+
+        self.leaky_relu_0 = LeakyReLU()
+        self.linear_1 = Linear(
+            in_features=120,
+            out_features=84,
+            weight_attr=fc_w2_attr,
+            bias_attr=fc_b2_attr)
+        self.linear_1.skip_quant = False
+
+        self.sigmoid_0 = Sigmoid()
+        self.linear_2 = Linear(
+            in_features=84,
+            out_features=num_classes,
+            weight_attr=fc_w3_attr,
+            bias_attr=fc_b3_attr)
+        self.linear_2.skip_quant = False
+        self.softmax_0 = Softmax()
+
+    def forward(self, inputs):
+        x = self.conv2d_0(inputs)
+        x = self.batch_norm_0(x)
+        x = self.relu_0(x)
+        x = self.pool2d_0(x)
+        x = self.conv2d_1(x)
+        x = self.batch_norm_1(x)
+        x = self.relu6_0(x)
+        x = self.pool2d_1(x)
+
+        x = fluid.layers.flatten(x, 1)
+
+        x = self.linear_0(x)
+        x = self.leaky_relu_0(x)
+        x = self.linear_1(x)
+        x = self.sigmoid_0(x)
+        x = self.linear_2(x)
+        x = self.softmax_0(x)
+
+        return x
+
+
+class TestImperativeOutSclae(unittest.TestCase):
+    def test_out_scale_acc(self):
+        seed = 1000
+        lr = 0.1
+
+        imperative_out_scale = ImperativeQuantAware()
+
+        np.random.seed(seed)
+        reader = paddle.batch(
+            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
+        lenet = ImperativeLenet()
+        fixed_state = {}
+        for name, param in lenet.named_parameters():
+            p_shape = param.numpy().shape
+            p_value = param.numpy()
+            if name.endswith("bias"):
+                value = np.zeros_like(p_value).astype('float32')
+            else:
+                value = np.random.normal(
+                    loc=0.0, scale=0.01,
+                    size=np.product(p_shape)).reshape(p_shape).astype('float32')
+            fixed_state[name] = value
+        lenet.set_dict(fixed_state)
+        imperative_out_scale.quantize(lenet)
+        adam = AdamOptimizer(
+            learning_rate=lr, parameter_list=lenet.parameters())
+        dynamic_loss_rec = []
+        lenet.train()
+        for batch_id, data in enumerate(reader()):
+            x_data = np.array([x[0].reshape(1, 28, 28)
+                               for x in data]).astype('float32')
+            y_data = np.array(
+                [x[1] for x in data]).astype('int64').reshape(-1, 1)
+
+            img = fluid.dygraph.to_variable(x_data)
+            label = fluid.dygraph.to_variable(y_data)
+
+            out = lenet(img)
+            loss = fluid.layers.cross_entropy(out, label)
+            avg_loss = fluid.layers.mean(loss)
+            avg_loss.backward()
+            adam.minimize(avg_loss)
+            lenet.clear_gradients()
+            dynamic_loss_rec.append(avg_loss.numpy()[0])
+            if batch_id % 100 == 0:
+                _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
+
+        lenet.eval()
+
+        path = "./save_dynamic_quant_infer_model/lenet"
+        save_dir = "./save_dynamic_quant_infer_model"
+
+        imperative_out_scale.save_quantized_model(
+            layer=lenet,
+            path=path,
+            input_spec=[
+                paddle.static.InputSpec(
+                    shape=[None, 1, 28, 28], dtype='float32')
+            ])
+
+        paddle.enable_static()
+
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+        else:
+            place = core.CPUPlace()
+        exe = fluid.Executor(place)
+
+        [inference_program, feed_target_names, fetch_targets] = (
+            fluid.io.load_inference_model(
+                dirname=save_dir,
+                executor=exe,
+                model_filename="lenet" + INFER_MODEL_SUFFIX,
+                params_filename="lenet" + INFER_PARAMS_SUFFIX))
+        model_ops = inference_program.global_block().ops
+
+        conv2d_count, mul_count = 0, 0
+        for i, op in enumerate(model_ops):
+            if op.type == 'conv2d':
+                if conv2d_count > 0:
+                    self.assertTrue(
+                        'fake_quantize_dequantize' in model_ops[i - 1].type)
+                else:
+                    self.assertTrue(
+                        'fake_quantize_dequantize' not in model_ops[i - 1].type)
+                conv2d_count += 1
+
+            if op.type == 'mul':
+                if mul_count > 0:
+                    self.assertTrue(
+                        'fake_quantize_dequantize' in model_ops[i - 1].type)
+                else:
+                    self.assertTrue(
+                        'fake_quantize_dequantize' not in model_ops[i - 1].type)
+                mul_count += 1
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
index c947eeb31fc199563aa3cbd71d07b1b0f0faeee3..10c01566d05ee2778a62b0bc92a2887cf9f66caa 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
@@ -73,7 +73,7 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
         feed_dict = {"image": img, "label": label}
         res = exe.run(binary, feed_dict)
 
-    def test_fw_bw(self):
+    def test_check_op_times(self):
         if core.is_compiled_with_cuda():
             self.check_backward(use_cuda=True)
         self.check_backward(use_cuda=False)