[clean fluid api] Move fluid/contrib/slim and remove fluid api. (#48717)

72973d5a · zhouzj · GitHub · a186e60d · 72973d5a · 72973d5a
72 changed file
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -119,7 +119,7 @@ if(WITH_TESTING)
  add_subdirectory(paddle/tests)
  add_subdirectory(paddle/fluid/tests)
  add_subdirectory(paddle/fluid/contrib/tests)
-  add_subdirectory(paddle/fluid/contrib/slim/tests)
+  add_subdirectory(paddle/static/quantization/tests)
 endif()
 if(NOT WITH_SETUP_INSTALL)

--- a/python/paddle/distributed/auto_parallel/engine.py
+++ b/python/paddle/distributed/auto_parallel/engine.py
@@ -1617,9 +1617,7 @@ class Engine:
            fetch_vars = self._fetch_vars["predict"]['outputs']
            dist_main_prog = self._dist_main_progs["predict"][self._cur_rank]
            if self._strategy.qat.enable and self._strategy.qat.onnx_format:
-                from paddle.fluid.contrib.slim.quantization import (
+                from paddle.static.quantization import QuantWeightPass
-                    QuantWeightPass,
-                )
                self._logger.info("export quantized model.")
                self._logger.info(

--- a/python/paddle/distributed/passes/auto_parallel_quantization.py
+++ b/python/paddle/distributed/passes/auto_parallel_quantization.py
@@ -18,14 +18,14 @@ import numpy as np
 import paddle
 from paddle.fluid import core, framework
-from paddle.fluid.contrib.slim.quantization import (
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.static.quantization import (
    AddQuantDequantForInferencePass,
    AddQuantDequantPassV2,
    OutScaleForTrainingPass,
    QuantizationTransformPassV2,
    utils,
 )
-from paddle.fluid.dygraph.parallel import ParallelEnv
 from ..auto_parallel.converter import Converter
 from ..auto_parallel.dist_attribute import (

--- a/python/paddle/fluid/contrib/__init__.py
+++ b/python/paddle/fluid/contrib/__init__.py
@@ -18,9 +18,6 @@ from . import memory_usage_calc
 from .memory_usage_calc import *
 from . import op_frequence
 from .op_frequence import *
-from . import quantize
-from .quantize import *
-from . import slim
 from . import extend_optimizer
 from .extend_optimizer import *
 from . import model_stat
@@ -36,7 +33,6 @@ __all__ = []
 __all__ += memory_usage_calc.__all__
 __all__ += op_frequence.__all__
-__all__ += quantize.__all__
 __all__ += extend_optimizer.__all__
 __all__ += ['mixed_precision']
 __all__ += layers.__all__

--- a/python/paddle/fluid/contrib/quantize/quantize_transpiler.py
+++ b/python/paddle/fluid/contrib/quantize/quantize_transpiler.py
--- a/python/paddle/fluid/contrib/slim/__init__.py
+++ b/python/paddle/fluid/contrib/slim/__init__.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
--- a/python/paddle/fluid/contrib/slim/quantization/__init__.py
+++ b/python/paddle/fluid/contrib/slim/quantization/__init__.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from . import quantization_pass
-from .quantization_pass import *
-from . import quant_int8_mkldnn_pass
-from .quant_int8_mkldnn_pass import *
-from . import quant2_int8_mkldnn_pass
-from .quant2_int8_mkldnn_pass import *
-from . import post_training_quantization
-from .post_training_quantization import *
-from . import imperative
-from .imperative import *
-__all__ = []
-__all__ += quantization_pass.__all__
-__all__ += quant_int8_mkldnn_pass.__all__
-__all__ += quant2_int8_mkldnn_pass.__all__
-__all__ += post_training_quantization.__all__
-__all__ += imperative.__all__
--- a/python/paddle/fluid/contrib/slim/quantization/quantize_transpiler_v2.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantize_transpiler_v2.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import collections
-import logging
-import numpy as np
-from .... import core
-from ....framework import Program, Operator, Variable, program_guard
-from ....executor import global_scope
-from .... import unique_name
-from ....layer_helper import LayerHelper
-from ....param_attr import ParamAttr
-from ....initializer import Constant
-from ....log_helper import get_logger
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
-)
-def find_next_ops(block, var_name):
-    """
-    Find all followed ops for the input variable.
-    """
-    res_ops = []
-    for op in block.ops:
-        if var_name in op.input_arg_names:
-            res_ops.append(op)
-    return res_ops
-def load_variable_data(scope, var_name):
-    '''
-    Load variable value from scope
-    '''
-    var_node = scope.find_var(var_name)
-    assert var_node is not None, "Cannot find " + var_name + " in scope."
-    return np.array(var_node.get_tensor())
-class QuantizeTranspilerV2:
-    def __init__(
-        self,
-        weight_bits=8,
-        activation_bits=8,
-        weight_quantize_type='abs_max',
-        activation_quantize_type='moving_average_abs_max',
-        quantizable_op_type=[
-            'conv2d',
-            'depthwise_conv2d',
-            'mul',
-        ],
-        skip_pattern=['skip_quant'],
-    ):
-        """
-        Apply fake quant for the quantized ops.
-        Args:
-            weight_bits(int): the bit of quantized weight.
-            activation_bits(int): the bit of quantized activation.
-            weight_quantize_type(str): the quantization type for weight.
-                Only support to be 'abs_max' and 'channel_wise_abs_max'.
-            activation_quantize_type(str): the quantization type for activation.
-                Only support to be 'abs_max' and 'moving_average_abs_max'.
-            quantizable_op_type(str): set the op type for quantization.
-            skip_pattern(str|list): The user-defined quantization skip pattern, which
-                will be presented in the name scope of an op. When the skip pattern is
-                detected in an op's name scope, the corresponding op will not be quantized.
-        """
-        self._weight_bits = weight_bits
-        self._activation_bits = activation_bits
-        assert activation_quantize_type in [
-            "abs_max",
-            "moving_average_abs_max",
-        ], (
-            "activation_quantize_type should be abs_max "
-            "or moving_average_abs_max for now."
-        )
-        assert weight_quantize_type in [
-            "abs_max",
-            "channel_wise_abs_max",
-        ], "weight_quantize_type should be abs_max or channel_wise_abs_max."
-        self._activation_quantize_type = activation_quantize_type
-        self._weight_quantize_type = weight_quantize_type
-        for op_type in quantizable_op_type:
-            assert op_type in [
-                'conv2d',
-                'depthwise_conv2d',
-                'mul',
-            ], "Quantize op should be ['conv2d', 'depthwise_conv2d', 'mul']"
-        self._quantizable_ops = quantizable_op_type
-        self._quantizable_grad_ops = [
-            '%s_grad' % (op) for op in self._quantizable_ops
-        ]
-        self._skip_pattern = skip_pattern
-        self._helper = LayerHelper(self.__class__.__name__)
-        self._moving_rate = 0.9
-        self._out_ch_axis1_ops = ['conv2d_transpose', 'mul', 'matmul']
-    def apply(self, program, startup_program, is_test=False):
-        """
-        Apply quantization to fluid Program.
-        Args:
-            program(Program): the train or test program to be quantized.
-            startup_program(Program): the corresponding startup_program.
-            is_test(bool): Whethe the program is used for test.
-        Returns:
-            None
-        """
-        assert isinstance(
-            program, Program
-        ), "program must be the instance of Program"
-        assert isinstance(
-            startup_program, Program
-        ), "startup_program must be the instance of Program"
-        var_rename_map = [
-            collections.OrderedDict() for _ in range(len(program.blocks))
-        ]
-        with program_guard(program, startup_program):
-            for block in program.blocks:
-                ops = list(block.ops)
-                for op in ops:
-                    if op.type in self._quantizable_ops and (
-                        not self._is_skip_quant(op)
-                    ):
-                        self._transform_forward(
-                            block, op, var_rename_map, is_test
-                        )
-            for block in program.blocks:
-                ops = list(block.ops)
-                for op in ops:
-                    if op.type in self._quantizable_grad_ops and (
-                        not self._is_skip_quant(op)
-                    ):
-                        self._transform_backward(block, op, var_rename_map)
-    def convert(self, test_program, scope=None):
-        """
-        Convert the test program.
-        Get the out scale from the moving_average_abs_max_scale op and save the
-        out scale into the quantized op.
-        Args:
-            test_program(Program): the test program to be converted.
-            scope(fluid.Scope, optional): The scope of the program, use it to load
-                and save variables. If scope=None, get scope by global_scope().
-        """
-        scope = global_scope() if scope is None else scope
-        for block in test_program.blocks:
-            for op in block.ops:
-                if (
-                    op.has_attr("quantization_type")
-                    and op.attr("quantization_type") == "qat_with_weight"
-                ):
-                    # quant op -> var1 -> fake op -> var2
-                    assert len(op.output_arg_names) == 1
-                    var1_name = op.output_arg_names[0]
-                    fake_ops = find_next_ops(block, var1_name)
-                    assert len(fake_ops) == 1
-                    fake_op = fake_ops[0]
-                    assert fake_op.type == "moving_average_abs_max_scale"
-                    out_scale_name = fake_op.output("OutScale")
-                    out_threshold = load_variable_data(scope, out_scale_name[0])
-                    op._set_attr("out_threshold", float(out_threshold))
-                    var2_name = fake_op.output("Out")[0]
-                    op._rename_output(var1_name, var2_name)
-                    fake_op._rename_output(var2_name, var1_name)
-    def _transform_forward(self, block, op, var_rename_map, is_test):
-        """
-        Insert fake quant op before the target ops.
-        """
-        op._set_attr("quantization_type", "qat_with_weight")
-        # insert fake quant op before the quantized op
-        for in_name in op.input_arg_names:
-            block_id = block.idx
-            idx = block.ops.index(op)
-            if in_name in var_rename_map[block_id]:
-                new_in_name = var_rename_map[block_id][in_name]
-            else:
-                in_var = block.var(in_name)
-                target_dtype = [
-                    core.VarDesc.VarType.FP32,
-                    core.VarDesc.VarType.FP16,
-                ]
-                if in_var.dtype not in target_dtype:
-                    continue
-                quant_bits = (
-                    self._weight_bits
-                    if in_var.persistable
-                    else self._activation_bits
-                )
-                quant_type = (
-                    self._weight_quantize_type
-                    if in_var.persistable
-                    else self._activation_quantize_type
-                )
-                if quant_type == "abs_max":
-                    new_var = self._insert_abs_max_fq_op(
-                        block, idx, in_var, quant_bits
-                    )
-                elif quant_type == "moving_average_abs_max":
-                    new_var = self._insert_ma_abs_max_fq_op(
-                        block, idx, in_var, quant_bits, is_test
-                    )
-                elif quant_type == "channel_wise_abs_max":
-                    ch_axis = 1 if op.type in self._out_ch_axis1_ops else 0
-                    new_var = self._insert_pc_abs_max_fq_op(
-                        block, idx, in_var, quant_bits, ch_axis
-                    )
-                else:
-                    _logger.error(
-                        "Don't support the quant_type: %s" % quant_type
-                    )
-                    continue
-                new_in_name = new_var.name
-                var_rename_map[block_id][in_name] = new_in_name
-            op._rename_input(in_name, new_in_name)
-        # insert out scale op followed the quantized op
-        for out_name in op.output_arg_names:
-            next_ops = find_next_ops(block, out_name)
-            idx = block.ops.index(op)
-            out_var = block.var(out_name)
-            new_out_var = self._insert_ma_abs_max_scale_op(
-                block, idx + 1, out_var, is_test, True
-            )
-            for next_op in next_ops:
-                if "_grad" not in next_op.type:
-                    next_op._rename_input(out_name, new_out_var.name)
-    def _is_skip_quant(self, op):
-        """
-        Analyse whether the op should skip quantization or not.
-        """
-        user_skipped = False
-        if isinstance(self._skip_pattern, list):
-            user_skipped = op.has_attr("op_namescope") and any(
-                pattern in op.attr("op_namescope")
-                for pattern in self._skip_pattern
-            )
-        elif isinstance(self._skip_pattern, str):
-            user_skipped = (
-                op.has_attr("op_namescope")
-                and op.attr("op_namescope").find(self._skip_pattern) != -1
-            )
-        return user_skipped
-    def _transform_backward(self, block, op, var_rename_map):
-        """
-        Update the backword of the target ops.
-        Note: for the grad ops, only rename the input, skip rename the output.
-        """
-        block_id = block.idx
-        no_dequanted_input_vars = True
-        for name in op.input_arg_names:
-            if name in var_rename_map[block_id]:
-                new_var_name = var_rename_map[block_id][name]
-                op._rename_input(name, new_var_name)
-                no_dequanted_input_vars = False
-        if no_dequanted_input_vars:
-            raise ValueError(
-                "There is no dequanted inputs for op %s." % (op.type)
-            )
-    def _insert_abs_max_fq_op(self, block, idx, in_var, quant_bits):
-        """
-        Inset abs max fake quant op.
-        """
-        quant_dequant_var = block.create_var(
-            type=in_var.type,
-            name="{}.quant_dequant".format(in_var.name),
-            shape=in_var.shape,
-            dtype=in_var.dtype,
-        )
-        scale_var = self._helper.create_parameter(
-            attr=ParamAttr(
-                name="{}.quant_dequant.scale".format(in_var.name),
-                initializer=Constant(0.0),
-                trainable=False,
-            ),
-            shape=[1],
-            dtype=in_var.dtype,
-        )
-        scale_var.stop_gradient = True
-        inputs = {'X': in_var}
-        outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
-        attrs = {'bit_length': quant_bits}
-        block._insert_op(
-            idx,
-            type='fake_quantize_dequantize_abs_max',
-            attrs=attrs,
-            inputs=inputs,
-            outputs=outputs,
-        )
-        return quant_dequant_var
-    def _insert_ma_abs_max_fq_op(self, block, idx, in_var, quant_bits, is_test):
-        """
-        Insert moving average abs max fake quant op.
-        """
-        quant_dequant_var = block.create_var(
-            type=in_var.type,
-            name="{}.quant_dequant".format(in_var.name),
-            shape=in_var.shape,
-            dtype=in_var.dtype,
-        )
-        scale_var = self._helper.create_parameter(
-            attr=ParamAttr(
-                name="{}.quant_dequant.scale".format(in_var.name),
-                initializer=Constant(0.0),
-                trainable=False,
-            ),
-            shape=[1],
-            dtype=in_var.dtype,
-        )
-        scale_var.stop_gradient = True
-        if not is_test:
-            state_var = self._helper.create_parameter(
-                attr=ParamAttr(
-                    name="{}.quant_dequant.state".format(in_var.name),
-                    initializer=Constant(0),
-                    trainable=False,
-                ),
-                shape=[1],
-                dtype=in_var.dtype,
-            )
-            state_var.stop_gradient = True
-            accum_var = self._helper.create_parameter(
-                attr=ParamAttr(
-                    name="{}.quant_dequant.accum".format(in_var.name),
-                    initializer=Constant(0),
-                    trainable=False,
-                ),
-                shape=[1],
-                dtype=in_var.dtype,
-            )
-            accum_var.stop_gradient = True
-        attrs = {
-            'moving_rate': self._moving_rate,
-            'bit_length': quant_bits,
-            'is_test': is_test,
-        }
-        inputs = {'X': in_var, 'InScale': scale_var}
-        outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
-        if not is_test:
-            inputs['InState'] = state_var
-            inputs['InAccum'] = accum_var
-            outputs['OutState'] = state_var
-            outputs['OutAccum'] = accum_var
-        block._insert_op(
-            idx,
-            type='fake_quantize_dequantize_moving_average_abs_max',
-            attrs=attrs,
-            inputs=inputs,
-            outputs=outputs,
-        )
-        return quant_dequant_var
-    def _insert_pc_abs_max_fq_op(self, block, idx, in_var, quant_bits, ch_axis):
-        """
-        Insert per channel abs max fake quant op.
-        """
-        quant_dequant_var = block.create_var(
-            type=in_var.type,
-            name="{}.quant_dequant".format(in_var.name),
-            shape=in_var.shape,
-            dtype=in_var.dtype,
-        )
-        scale_var = self._helper.create_parameter(
-            attr=ParamAttr(
-                name="{}.quant_dequant.scale".format(in_var.name),
-                initializer=Constant(0.0),
-                trainable=False,
-            ),
-            shape=[in_var.shape[ch_axis]],
-            dtype=in_var.dtype,
-        )
-        scale_var.stop_gradient = True
-        inputs = {'X': in_var}
-        outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
-        attrs = {'bit_length': quant_bits, 'quant_axis': ch_axis}
-        block._insert_op(
-            idx,
-            type='fake_channel_wise_quantize_dequantize_abs_max',
-            attrs=attrs,
-            inputs=inputs,
-            outputs=outputs,
-        )
-        return quant_dequant_var
-    def _insert_ma_abs_max_scale_op(
-        self, block, idx, in_var, is_test, has_out_var=False
-    ):
-        """
-        Insert moving average abs max scale op.
-        """
-        scale_var = self._helper.create_parameter(
-            attr=ParamAttr(
-                name="{}.outscale.scale".format(in_var.name),
-                initializer=Constant(0.0),
-                trainable=False,
-            ),
-            shape=[1],
-            dtype=in_var.dtype,
-        )
-        scale_var.stop_gradient = True
-        attrs = {'moving_rate': self._moving_rate, 'is_test': is_test}
-        inputs = {'X': in_var}
-        outputs = {'OutScale': scale_var}
-        if not is_test:
-            state_var = self._helper.create_parameter(
-                attr=ParamAttr(
-                    name="{}.outscale.state".format(in_var.name),
-                    initializer=Constant(0),
-                    trainable=False,
-                ),
-                shape=[1],
-                dtype=in_var.dtype,
-            )
-            state_var.stop_gradient = True
-            accum_var = self._helper.create_parameter(
-                attr=ParamAttr(
-                    name="{}.outscale.accum".format(in_var.name),
-                    initializer=Constant(0),
-                    trainable=False,
-                ),
-                shape=[1],
-                dtype=in_var.dtype,
-            )
-            accum_var.stop_gradient = True
-            inputs['InState'] = state_var
-            inputs['InAccum'] = accum_var
-            outputs['OutState'] = state_var
-            outputs['OutAccum'] = accum_var
-        if has_out_var:
-            out_var = block.create_var(
-                type=in_var.type,
-                name="{}.tmp".format(in_var.name),
-                shape=in_var.shape,
-                dtype=in_var.dtype,
-            )
-            outputs['Out'] = out_var
-        block._insert_op(
-            idx,
-            type='moving_average_abs_max_scale',
-            attrs=attrs,
-            inputs=inputs,
-            outputs=outputs,
-        )
-        if has_out_var:
-            return out_var
--- a/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-import os
-import unittest
-import random
-import numpy as np
-import paddle.fluid as fluid
-import paddle
-from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization.quantize_transpiler_v2 import (
-    QuantizeTranspilerV2,
-)
-from paddle.fluid import core
-paddle.enable_static()
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-os.environ["CPU_NUM"] = "1"
-def conv_net(img, label):
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        filter_size=5,
-        num_filters=20,
-        pool_size=2,
-        pool_stride=2,
-        pool_type='max',
-        act="relu",
-    )
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        filter_size=5,
-        num_filters=50,
-        pool_size=2,
-        pool_stride=2,
-        pool_type='avg',
-        act="relu",
-    )
-    with fluid.name_scope("skip_quant"):
-        hidden = fluid.layers.fc(input=conv_pool_1, size=100, act='relu')
-    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
-    loss = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
-    )
-    avg_loss = paddle.mean(loss)
-    return avg_loss
-class TestQuantizeProgramPass(unittest.TestCase):
-    def quantize_program(
-        self,
-        use_cuda,
-        seed,
-        activation_quant_type='abs_max',
-        weight_quant_type='abs_max',
-        for_ci=False,
-    ):
-        def build_program(main, startup, is_test):
-            main.random_seed = seed
-            startup.random_seed = seed
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32'
-                    )
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64'
-                    )
-                    loss = conv_net(img, label)
-                    if not is_test:
-                        opt = fluid.optimizer.Adam(learning_rate=0.0001)
-                        opt.minimize(loss)
-            return [img, label], loss
-        random.seed(0)
-        np.random.seed(0)
-        # 1 Define program
-        train_program = fluid.Program()
-        startup_program = fluid.Program()
-        test_program = fluid.Program()
-        feeds, loss = build_program(train_program, startup_program, False)
-        build_program(test_program, startup_program, True)
-        test_program = test_program.clone(for_test=True)
-        if not for_ci:
-            train_graph = IrGraph(
-                core.Graph(train_program.desc), for_test=False
-            )
-            train_graph.draw('.', 'train_program_1')
-            test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
-            test_graph.draw('.', 'test_program_1')
-        # 2 Apply quantization
-        qt = QuantizeTranspilerV2(
-            activation_quantize_type=activation_quant_type,
-            weight_quantize_type=weight_quant_type,
-        )
-        qt.apply(train_program, startup_program, is_test=False)
-        qt.apply(test_program, startup_program, is_test=True)
-        # 3 Train
-        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-        exe = fluid.Executor(place)
-        scope = fluid.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup_program)
-        if not for_ci:
-            train_graph = IrGraph(
-                core.Graph(train_program.desc), for_test=False
-            )
-            train_graph.draw('.', 'train_program_2')
-            test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
-            test_graph.draw('.', 'test_program_2')
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.memory_optimize = False
-        build_strategy.enable_inplace = False
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(train_program).with_data_parallel(
-            loss_name=loss.name, build_strategy=build_strategy
-        )
-        iters = 5
-        batch_size = 8
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(), batch_size=batch_size
-        )
-        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
-        with fluid.scope_guard(scope):
-            for idx in range(iters):
-                data = next(train_reader())
-                loss_v = exe.run(
-                    binary, feed=feeder.feed(data), fetch_list=[loss]
-                )
-                if not for_ci and idx % 20 == 0:
-                    print('{}: {}'.format('loss', np.mean(loss_v)))
-        print('{}: {}'.format('loss', np.mean(loss_v)))
-        # 4 Convert
-        qt.convert(test_program, scope)
-        if not for_ci:
-            with fluid.scope_guard(scope):
-                fluid.io.save_inference_model(
-                    './infer_model',
-                    ['image', 'label'],
-                    [loss],
-                    exe,
-                    test_program,
-                    clip_extra=True,
-                )
-    def test_gpu_1(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.quantize_program(
-                use_cuda=True,
-                seed=1,
-                activation_quant_type='abs_max',
-                weight_quant_type='abs_max',
-                for_ci=True,
-            )
-    def test_gpu_2(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.quantize_program(
-                use_cuda=True,
-                seed=1,
-                activation_quant_type='moving_average_abs_max',
-                weight_quant_type='channel_wise_abs_max',
-                for_ci=True,
-            )
-    def test_cpu_1(self):
-        self.quantize_program(
-            use_cuda=False,
-            seed=2,
-            activation_quant_type='abs_max',
-            weight_quant_type='abs_max',
-            for_ci=True,
-        )
-    def test_cpu_2(self):
-        self.quantize_program(
-            use_cuda=False,
-            seed=2,
-            activation_quant_type='moving_average_abs_max',
-            weight_quant_type='channel_wise_abs_max',
-            for_ci=True,
-        )
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/tests/CMakeLists.txt
@@ -25,5 +25,4 @@ set_tests_properties(test_multi_precision_fp16_train PROPERTIES TIMEOUT 120)
 if(APPLE)
  set_tests_properties(test_model_cast_to_bf16 PROPERTIES TIMEOUT 300)
-  set_tests_properties(test_quantize_transpiler PROPERTIES TIMEOUT 300)
 endif()
--- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-import numpy as np
-import unittest
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.contrib.quantize.quantize_transpiler import _original_var_name
-from paddle.fluid.contrib.quantize.quantize_transpiler import QuantizeTranspiler
-import paddle
-paddle.enable_static()
-def linear_fc(num):
-    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    hidden = data
-    for _ in range(num):
-        hidden = fluid.layers.fc(hidden, size=128, act='relu')
-    loss = paddle.nn.functional.cross_entropy(
-        input=hidden, label=label, reduction='none', use_softmax=False
-    )
-    loss = paddle.mean(loss)
-    return loss
-def residual_block(num):
-    def conv_bn_layer(
-        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
-    ):
-        tmp = paddle.static.nn.conv2d(
-            input=input,
-            filter_size=filter_size,
-            num_filters=ch_out,
-            stride=stride,
-            padding=padding,
-            act=None,
-            bias_attr=bias_attr,
-        )
-        return paddle.static.nn.batch_norm(input=tmp, act=act)
-    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    hidden = data
-    for _ in range(num):
-        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
-        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
-        hidden = paddle.nn.functional.relu(paddle.add(x=conv, y=short))
-    fc = fluid.layers.fc(input=hidden, size=10)
-    loss = paddle.nn.functional.cross_entropy(
-        input=fc, label=label, reduction='none', use_softmax=False
-    )
-    loss = paddle.mean(loss)
-    return loss
-def conv_net(img, label):
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        filter_size=5,
-        num_filters=20,
-        pool_size=2,
-        pool_stride=2,
-        act="relu",
-    )
-    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        filter_size=5,
-        num_filters=50,
-        pool_size=2,
-        pool_stride=2,
-        act="relu",
-    )
-    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    loss = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
-    )
-    avg_loss = paddle.mean(loss)
-    return avg_loss
-class TestQuantizeTranspiler(unittest.TestCase):
-    def setUp(self):
-        # since quant_op and dequant_op is not ready, use cos and sin for test
-        self.weight_quant_op_type = 'fake_quantize_abs_max'
-        self.dequant_op_type = 'fake_dequantize_max_abs'
-        self.quantizable_op_and_inputs = {
-            'conv2d': ['Input', 'Filter'],
-            'depthwise_conv2d': ['Input', 'Filter'],
-            'mul': ['X', 'Y'],
-        }
-        self.quantizable_op_grad_and_inputs = {
-            'conv2d_grad': ['Input', 'Filter'],
-            'depthwise_conv2d_grad': ['Input', 'Filter'],
-            'mul_grad': ['X', 'Y'],
-        }
-    def check_program(self, program):
-        quantized_ops = {}
-        persistable_vars = [
-            v.name
-            for v in filter(lambda var: var.persistable, program.list_vars())
-        ]
-        for block in program.blocks:
-            for idx, op in enumerate(block.ops):
-                # check forward
-                if op.type in self.quantizable_op_and_inputs:
-                    for i, arg_name in enumerate(op.input_arg_names):
-                        quant_op_type = (
-                            self.weight_quant_op_type
-                            if _original_var_name(arg_name) in persistable_vars
-                            else self.act_quant_op_type
-                        )
-                        self.assertTrue(
-                            arg_name.endswith('.quantized.dequantized')
-                        )
-                        if arg_name not in quantized_ops:
-                            self.assertEqual(
-                                block.ops[idx - 2 * i - 1].type,
-                                self.dequant_op_type,
-                            )
-                            self.assertEqual(
-                                block.ops[idx - 2 * i - 2].type, quant_op_type
-                            )
-                            quantized_ops[arg_name] = block.ops[idx - 2 * i - 2]
-                        else:
-                            op_idx = block.ops.index(quantized_ops[arg_name])
-                            self.assertLess(op_idx, idx)
-                # check backward
-                if op.type in self.quantizable_op_grad_and_inputs:
-                    for pname in self.quantizable_op_grad_and_inputs[op.type]:
-                        arg_name = op.input(pname)[0]
-                        self.assertTrue(
-                            arg_name.endswith('.quantized.dequantized')
-                        )
-                        self.assertTrue(arg_name in quantized_ops)
-    def linear_fc_quant(self, quant_type):
-        main = fluid.Program()
-        startup = fluid.Program()
-        with fluid.program_guard(main, startup):
-            loss = linear_fc(3)
-            opt = fluid.optimizer.Adam(learning_rate=0.001)
-            opt.minimize(loss)
-            t = QuantizeTranspiler(activation_quantize_type=quant_type)
-            t.training_transpile(main)
-            self.check_program(main)
-    def test_linear_fc_quant_abs_max(self):
-        self.act_quant_op_type = 'fake_quantize_abs_max'
-        self.linear_fc_quant('abs_max')
-    def test_linear_fc_quant_range_abs_max(self):
-        self.act_quant_op_type = 'fake_quantize_range_abs_max'
-        self.linear_fc_quant('range_abs_max')
-    def residual_block_quant(self, quant_type):
-        main = fluid.Program()
-        startup = fluid.Program()
-        with fluid.program_guard(main, startup):
-            loss = residual_block(2)
-            opt = fluid.optimizer.Adam(learning_rate=0.001)
-            opt.minimize(loss)
-            t = QuantizeTranspiler(activation_quantize_type=quant_type)
-            t.training_transpile(main)
-            self.check_program(main)
-    def test_residual_block_abs_max(self):
-        self.act_quant_op_type = 'fake_quantize_abs_max'
-        self.residual_block_quant('abs_max')
-    def test_residual_block_range_abs_max(self):
-        self.act_quant_op_type = 'fake_quantize_range_abs_max'
-        self.residual_block_quant('range_abs_max')
-    def freeze_program(self, use_cuda, seed):
-        def build_program(main, startup, is_test):
-            main.random_seed = seed
-            startup.random_seed = seed
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32'
-                    )
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64'
-                    )
-                    loss = conv_net(img, label)
-                    if not is_test:
-                        opt = fluid.optimizer.Adam(learning_rate=0.001)
-                        opt.minimize(loss)
-            return [img, label], loss
-        main = fluid.Program()
-        startup = fluid.Program()
-        test_program = fluid.Program()
-        import random
-        random.seed(0)
-        np.random.seed(0)
-        feeds, loss = build_program(main, startup, False)
-        build_program(test_program, startup, True)
-        test_program = test_program.clone(for_test=True)
-        quant_type = 'range_abs_max'  # 'range_abs_max' or 'abs_max'
-        quant_transpiler = QuantizeTranspiler(
-            activation_quantize_type=quant_type
-        )
-        quant_transpiler.training_transpile(main, startup)
-        quant_transpiler.training_transpile(test_program, startup)
-        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-        exe = fluid.Executor(place)
-        iters = 5
-        batch_size = 8
-        class_num = 10
-        exe.run(startup)
-        train_reader = paddle.batch(
-            paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500),
-            batch_size=batch_size,
-        )
-        test_reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=batch_size
-        )
-        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
-        with fluid.program_guard(main):
-            for _ in range(iters):
-                data = next(train_reader())
-                loss_v = exe.run(
-                    program=main, feed=feeder.feed(data), fetch_list=[loss]
-                )
-        with fluid.program_guard(test_program):
-            test_data = next(test_reader())
-            w_var = fluid.framework._get_var(
-                'conv2d_1.w_0.quantized', test_program
-            )
-            # Testing during training
-            test_loss1, w_quant = exe.run(
-                program=test_program,
-                feed=feeder.feed(test_data),
-                fetch_list=[loss, w_var],
-            )
-            # Freeze program for inference, but the weight of fc/conv is still float type.
-            quant_transpiler.freeze_program(test_program, place)
-            (test_loss2,) = exe.run(
-                program=test_program,
-                feed=feeder.feed(test_data),
-                fetch_list=[loss],
-            )
-            self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
-            w_freeze = np.array(
-                fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()
-            )
-            # fail: -432.0 != -433.0, this is due to the calculation precision
-            # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
-            # Convert parameter to 8-bit.
-            quant_transpiler.convert_to_int8(test_program, place)
-            # Save the 8-bit parameter and model file.
-            fluid.io.save_inference_model(
-                'model_8bit',
-                ['image', 'label'],
-                [loss],
-                exe,
-                test_program,
-                clip_extra=True,
-            )
-            # Test whether the 8-bit parameter and model file can be loaded successfully.
-            [infer, feed, fetch] = fluid.io.load_inference_model(
-                'model_8bit', exe
-            )
-            # Check the loaded 8-bit weight.
-            w_8bit = np.array(
-                fluid.global_scope().find_var('conv2d_1.w_0.int8').get_tensor()
-            )
-            self.assertEqual(w_8bit.dtype, np.int8)
-            self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
-    def not_test_freeze_program_cuda(self):
-        if fluid.core.is_compiled_with_cuda():
-            with fluid.unique_name.guard():
-                self.freeze_program(True, seed=1)
-    def not_test_freeze_program_cpu(self):
-        with fluid.unique_name.guard():
-            self.freeze_program(False, seed=2)
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_qat.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_qat.py
@@ -23,7 +23,7 @@ import paddle.distributed.fleet as fleet
 import paddle.fluid as fluid
 import paddle.nn as nn
 from paddle.distributed.utils.launch_utils import find_free_ports, get_cluster
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
+from paddle.quantization import ImperativeQuantAware
 def set_random_seed(seed, dp_id, rank_id):

--- a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py
@@ -20,10 +20,6 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.contrib.slim.quantization import (
-    QuantizationFreezePass,
-    QuantizationTransformPass,
-)
 from paddle.fluid.executor import global_scope
 from paddle.fluid.framework import (
    IrGraph,
@@ -32,6 +28,10 @@ from paddle.fluid.framework import (
    convert_np_dtype_to_dtype_,
 )
 from paddle.fluid.initializer import NumpyArrayInitializer
+from paddle.static.quantization import (
+    QuantizationFreezePass,
+    QuantizationTransformPass,
+)
 class TensorConfig:

--- a/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py
@@ -21,16 +21,16 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, Variable, core
-from paddle.fluid.contrib.slim.quantization import (
+from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
+from paddle.fluid.framework import IrGraph
+from paddle.fluid.io import append_fetch_ops, prepend_feed_ops
+from paddle.static.quantization import (
    AddQuantDequantPass,
    OutScaleForInferencePass,
    OutScaleForTrainingPass,
    QuantizationFreezePass,
    QuantizationTransformPass,
 )
-from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
-from paddle.fluid.framework import IrGraph
-from paddle.fluid.io import append_fetch_ops, prepend_feed_ops
 class QuantDequantTest(unittest.TestCase):

--- a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
@@ -18,9 +18,9 @@ import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 from paddle.fluid import core
-from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.fluid.framework import IrGraph, Program, program_guard
 from paddle.fluid.tests.unittests.op_test import OpTestTool
+from paddle.static.quantization import QuantizationTransformPass
 paddle.enable_static()

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py
@@ -24,7 +24,7 @@ from PIL import Image
 import paddle
 import paddle.fluid as fluid
 from paddle.dataset.common import download
-from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
+from paddle.static.quantization import PostTrainingQuantization
 paddle.enable_static()

--- a/python/paddle/quantization/__init__.py
+++ b/python/paddle/quantization/__init__.py
@@ -12,40 +12,41 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ..fluid.contrib.slim.quantization.imperative.ptq_config import (
+from .imperative.ptq_config import (
    PTQConfig,
    default_ptq_config,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
+from .imperative.ptq_quantizer import (
    BaseQuantizer,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
+from .imperative.ptq_quantizer import (
    AbsmaxQuantizer,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
+from .imperative.ptq_quantizer import (
    PerChannelAbsmaxQuantizer,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
+from .imperative.ptq_quantizer import (
    KLQuantizer,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
+from .imperative.ptq_quantizer import (
    HistQuantizer,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
+from .imperative.ptq_quantizer import (
    SUPPORT_ACT_QUANTIZERS,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
+from .imperative.ptq_quantizer import (
    SUPPORT_WT_QUANTIZERS,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq_registry import (
+from .imperative.ptq_registry import (
    PTQRegistry,
 )
-from ..fluid.contrib.slim.quantization.imperative.ptq import ImperativePTQ
+from .imperative.ptq import (
-from ..fluid.contrib.slim.quantization.imperative.qat import (
+    ImperativePTQ,
+)
+from .imperative.qat import (
    ImperativeQuantAware,
 )
 from .config import QuantConfig
 from .base_quanter import BaseQuanter
 from .factory import quanter

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/__init__.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/__init__.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,23 +13,24 @@
 # limitations under the License.
 from . import qat
-from .qat import *
+from .qat import ImperativeQuantAware
 from . import ptq
-from .ptq import *
+from .ptq import ImperativePTQ
 from . import ptq_config
-from .ptq_config import *
+from .ptq_config import PTQConfig, default_ptq_config
 from . import ptq_quantizer
-from .ptq_quantizer import *
+from .ptq_quantizer import (
+    BaseQuantizer,
+    AbsmaxQuantizer,
+    PerChannelAbsmaxQuantizer,
+    KLQuantizer,
+    HistQuantizer,
+    SUPPORT_ACT_QUANTIZERS,
+    SUPPORT_WT_QUANTIZERS,
+)
 from . import ptq_registry
-from .ptq_registry import *
+from .ptq_registry import PTQRegistry
-__all__ = []
-__all__ += qat.__all__
-__all__ += ptq.__all__
-__all__ += ptq_config.__all__
-__all__ += ptq_quantizer.__all__
-__all__ += ptq_registry.__all__
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
 # limitations under the License.
 import copy
 import paddle
 import paddle.nn as nn
 from . import utils
@@ -66,7 +68,7 @@ def fuse_layers(model, layers_to_fuse, inplace=False):
    Return
        fused_model(paddle.nn.Layer): The fused model.
    '''
-    if inplace == False:
+    if inplace is False:
        model = copy.deepcopy(model)
    for layers in layers_to_fuse:
        _fuse_layers(model, layers)

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,24 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import logging
 import copy
+import logging
 import os
 import numpy as np
 import paddle
 import paddle.nn.quant.quant_layers as quant_layers
-from paddle.fluid.log_helper import get_logger
-from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from ...static.log_helper import get_logger
+from ...static.quantization.utils import (
-from . import fuse_utils
+    _get_input_name_index,
-from . import utils
+    _get_op_input_var_names,
-from . import ptq_hooks
+    _get_op_output_var_names,
-from . import ptq_config
+    _get_output_name_index,
-from . import ptq_quantizer
+)
+from . import fuse_utils, ptq_config, ptq_hooks, ptq_quantizer, utils
 from .ptq_registry import PTQRegistry
-__all__ = ['ImperativePTQ']
+INFER_MODEL_SUFFIX = ".pdmodel"
+INFER_PARAMS_SUFFIX = ".pdiparams"
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
@@ -165,8 +168,8 @@ class ImperativePTQ:
            infer_program,
            feed_target_names,
            fetch_targets,
-        ] = paddle.fluid.io.load_inference_model(
+        ] = paddle.static.load_inference_model(
-            dirname=dirname,
+            path_prefix=dirname,
            executor=exe,
            model_filename=model_filename,
            params_filename=params_filename,
@@ -178,14 +181,23 @@ class ImperativePTQ:
        self._remove_scale_op(infer_program)
        # Save final program
-        paddle.fluid.io.save_inference_model(
+        model_name = None
-            dirname=dirname,
+        if model_filename is None:
-            feeded_var_names=feed_target_names,
+            model_name = "model"
-            target_vars=fetch_targets,
+        elif model_filename.endswith(".pdmodel"):
+            model_name = model_filename.rsplit(".", 1)[0]
+        else:
+            model_name = model_filename
+        path_prefix = os.path.join(dirname, model_name)
+        feed_vars = [
+            infer_program.global_block().var(name) for name in feed_target_names
+        ]
+        paddle.static.save_inference_model(
+            path_prefix,
+            feed_vars,
+            fetch_targets,
            executor=exe,
-            main_program=infer_program.clone(),
+            program=infer_program.clone(),
-            model_filename=model_filename,
-            params_filename=params_filename,
        )
        if is_dynamic_mode:
@@ -302,7 +314,7 @@ class ImperativePTQ:
            ) and PTQRegistry.is_simulated_quant_layer(sub_layer):
                quant_config = sub_layer._quant_config
-                assert quant_config.enable_in_act_quantizer == True
+                assert quant_config.enable_in_act_quantizer is True
                wt_quantizer = quant_config.wt_quantizer
                in_act_quantizer = quant_config.in_act_quantizer
@@ -376,7 +388,7 @@ class ImperativePTQ:
            None
        """
        for op in utils.program_all_ops(program):
-            for in_var_name in utils._get_op_input_var_names(op):
+            for in_var_name in _get_op_input_var_names(op):
                previous_op = utils.find_previous_op(op.block, in_var_name)
                if previous_op is None:
                    continue
@@ -388,20 +400,16 @@ class ImperativePTQ:
                    attr_name = previous_op.output('OutScale')[0]
                    in_threshold = utils.load_variable_data(scope, attr_name)
                    in_threshold = utils.fp_numpy_to_naive(in_threshold)
-                    argname, index = utils._get_input_name_index(
+                    argname, index = _get_input_name_index(op, in_var_name)
-                        op, in_var_name
-                    )
                    op._set_attr(
                        argname + str(index) + "_threshold", in_threshold
                    )
                    op._set_attr("with_quant_attr", True)
                else:
-                    for out_var_name in utils._get_op_output_var_names(
+                    for out_var_name in _get_op_output_var_names(previous_op):
-                        previous_op
-                    ):
                        if out_var_name != in_var_name:
                            continue
-                        argname, index = utils._get_output_name_index(
+                        argname, index = _get_output_name_index(
                            previous_op, out_var_name
                        )
                        attr_name = argname + str(index) + "_threshold"
@@ -409,9 +417,7 @@ class ImperativePTQ:
                            continue
                        threshold = previous_op.attr(attr_name)
-                        argname, index = utils._get_input_name_index(
+                        argname, index = _get_input_name_index(op, in_var_name)
-                            op, in_var_name
-                        )
                        attr_name = argname + str(index) + "_threshold"
                        op._set_attr(attr_name, threshold)
                        op._set_attr("with_quant_attr", True)
@@ -453,10 +459,10 @@ class ImperativePTQ:
                    continue
                next_op = next_ops[0]
-                argname, index = utils._get_output_name_index(op, out_var_name)
+                argname, index = _get_output_name_index(op, out_var_name)
                old_attr_name = argname + str(index) + "_threshold"
-                argname, index = utils._get_output_name_index(
+                argname, index = _get_output_name_index(
                    next_op, next_op.output("Out")[0]
                )
                new_attr_name = argname + str(index) + "_threshold"
@@ -478,7 +484,7 @@ class ImperativePTQ:
    @staticmethod
    def _is_skip_layer(layer):
-        return hasattr(layer, "skip_quant") and layer.skip_quant == True
+        return hasattr(layer, "skip_quant") and layer.skip_quant is True
    @staticmethod
    def _is_quant_layer(layer):

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_config.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_config.py
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import abc
 import copy
-import paddle
+from .ptq_quantizer import (
+    SUPPORT_ACT_QUANTIZERS,
-from .ptq_quantizer import *
+    SUPPORT_WT_QUANTIZERS,
+    KLQuantizer,
-__all__ = ['PTQConfig', 'default_ptq_config']
+    PerChannelAbsmaxQuantizer,
+)
 class PTQConfig:

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_hooks.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_hooks.py
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,12 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import paddle
-import math
-import numpy as np
-from . import ptq_config
-from .ptq_registry import PTQRegistry
 def quant_forward_post_hook(layer, inputs, outputs):
    """

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_quantizer.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_quantizer.py
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,24 +13,14 @@
 # limitations under the License.
 import abc
-import copy
 import math
 import numpy as np
 import paddle
+from ...static.quantization.cal_kl_threshold import cal_kl_threshold
 from . import utils
-from ..cal_kl_threshold import cal_kl_threshold
-__all__ = [
-    'BaseQuantizer',
-    'AbsmaxQuantizer',
-    'PerChannelAbsmaxQuantizer',
-    'KLQuantizer',
-    'HistQuantizer',
-    'SUPPORT_ACT_QUANTIZERS',
-    'SUPPORT_WT_QUANTIZERS',
-]
 def abs_max_value(tensor):

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,6 @@
 import paddle
-__all__ = ['PTQRegistry']
 class LayerInfo:
    """

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,35 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import collections
-import logging
-import numpy as np
-import sys
 import os
-import warnings
 import paddle
-import paddle.nn as nn
 import paddle.nn.quant.quant_layers as quant_layers
-from paddle.fluid import dygraph, core, framework, unique_name
 from paddle.fluid.framework import IrGraph
-from paddle.fluid.executor import Executor, global_scope
+from paddle.framework import core
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Constant
+from ...static.quantization.quantization_pass import (
-from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+    QuantWeightPass,
-from paddle.fluid.io import load_inference_model, save_inference_model
+    ReplaceFakeQuantDequantPass,
-from ..quantization_pass import ReplaceFakeQuantDequantPass, QuantWeightPass
+)
-from paddle.fluid.log_helper import get_logger
+from ...static.quantization.utils import (
-from .. import quantization_pass
+    _get_input_name_index,
-from ..utils import move_persistable_var_to_global_block
+    _get_op_input_var_names,
-from . import utils
+    _get_output_name_index,
-from . import fuse_utils
+    move_persistable_var_to_global_block,
-__all__ = ['ImperativeQuantAware']
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
 )
+from . import fuse_utils, utils
+INFER_MODEL_SUFFIX = ".pdmodel"
+INFER_PARAMS_SUFFIX = ".pdiparams"
 def lazy_import_fleet(layer_name_map, fake_quant_input_layers):
@@ -147,7 +139,7 @@ class ImperativeQuantAware:
        .. code-block:: python
            import paddle
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import ImperativeQuantAware
            from paddle.vision.models \
                import resnet
@@ -178,7 +170,7 @@ class ImperativeQuantAware:
        .. code-block:: python
            import paddle
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import ImperativeQuantAware
            class ImperativeModel(paddle.nn.Layer):
@@ -256,7 +248,7 @@ class ImperativeQuantAware:
        .. code-block:: python
            import paddle
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import ImperativeQuantAware
            class ImperativeModel(paddle.nn.Layer):
@@ -288,8 +280,8 @@ class ImperativeQuantAware:
            imperative_qat.quantize(model)
        """
        assert isinstance(
-            model, dygraph.Layer
+            model, paddle.nn.Layer
-        ), "The model must be the instance of dygraph.Layer."
+        ), "The model must be the instance of paddle.nn.Layer."
        if self.fuse_conv_bn:
            fuse_utils.fuse_conv_bn(model)
@@ -376,7 +368,7 @@ class ImperativeQuantizeInputs:
        ), "activation_bits should be 1, 2,... or 16."
        layer_check = lambda method: method is None or issubclass(
-            method, dygraph.layers.Layer
+            method, paddle.nn.Layer
        )
        assert layer_check(
            weight_preprocess_layer
@@ -417,13 +409,13 @@ class ImperativeQuantizeInputs:
        """
        assert isinstance(
-            model, dygraph.Layer
+            model, paddle.nn.Layer
-        ), "The model must be the instance of dygraph.Layer."
+        ), "The model must be the instance of paddle.nn.Layer."
        for name, cur_layer in model.named_sublayers():
            if not isinstance(cur_layer, self._quantizable_layer_type) or (
                hasattr(cur_layer, "skip_quant")
-                and cur_layer.skip_quant == True
+                and cur_layer.skip_quant is True
            ):
                continue
@@ -480,8 +472,8 @@ class ImperativeQuantizeOutputs:
            None
        """
        assert isinstance(
-            model, dygraph.Layer
+            model, paddle.nn.Layer
-        ), "The model must be the instance of dygraph.Layer."
+        ), "The model must be the instance of paddle.nn.Layer."
        for cur_name, cur_layer in model.named_sublayers():
            if '_act_preprocess' in cur_name:
@@ -535,8 +527,8 @@ class ImperativeQuantizeOutputs:
            None
        """
        assert isinstance(
-            model, dygraph.Layer
+            model, paddle.nn.Layer
-        ), "The model must be the instance of dygraph.Layer."
+        ), "The model must be the instance of paddle.nn.Layer."
        paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config)
@@ -546,8 +538,8 @@ class ImperativeQuantizeOutputs:
            paddle.enable_static()
        place = core.CPUPlace()
-        scope = global_scope()
+        scope = paddle.static.global_scope()
-        exe = Executor(place)
+        exe = paddle.static.Executor(place)
        dirname = os.path.dirname(path)
        basename = os.path.basename(path)
@@ -558,8 +550,8 @@ class ImperativeQuantizeOutputs:
            infer_program,
            feed_target_names,
            fetch_targets,
-        ] = load_inference_model(
+        ] = paddle.static.load_inference_model(
-            dirname=dirname,
+            dirname,
            executor=exe,
            model_filename=model_filename,
            params_filename=params_filename,
@@ -600,14 +592,23 @@ class ImperativeQuantizeOutputs:
        move_persistable_var_to_global_block(infer_program)
-        save_inference_model(
+        model_name = None
-            dirname=dirname,
+        if model_filename is None:
-            feeded_var_names=feed_target_names,
+            model_name = "model"
-            target_vars=fetch_targets,
+        elif model_filename.endswith(".pdmodel"):
+            model_name = model_filename.rsplit(".", 1)[0]
+        else:
+            model_name = model_filename
+        path_prefix = os.path.join(dirname, model_name)
+        feed_vars = [
+            infer_program.global_block().var(name) for name in feed_target_names
+        ]
+        paddle.static.save_inference_model(
+            path_prefix,
+            feed_vars,
+            fetch_targets,
            executor=exe,
-            main_program=infer_program.clone(),
+            program=infer_program.clone(),
-            model_filename=model_filename,
-            params_filename=params_filename,
            clip_extra=clip_extra,
        )
@@ -619,7 +620,7 @@ class ImperativeQuantizeOutputs:
        Whether the layer needs to calculate output scales.
        """
        # exclude fake_quant ops in quant_layers file
-        if not isinstance(layer, dygraph.Layer):
+        if not isinstance(layer, paddle.nn.Layer):
            return False
        if self._onnx_format:
@@ -660,7 +661,7 @@ class ImperativeQuantizeOutputs:
                        target_ops.append(op)
            for op in target_ops:
-                for in_var_name in utils._get_op_input_var_names(op):
+                for in_var_name in _get_op_input_var_names(op):
                    previous_op = utils.find_previous_op(op.block, in_var_name)
                    if previous_op is not None and (
@@ -670,9 +671,7 @@ class ImperativeQuantizeOutputs:
                        scale_name = previous_op.output('OutScale')[0]
                        in_scale = utils.load_variable_data(scope, scale_name)
                        in_scale = utils.fp_numpy_to_naive(in_scale)
-                        argname, index = utils._get_input_name_index(
+                        argname, index = _get_input_name_index(op, in_var_name)
-                            op, in_var_name
-                        )
                        op._set_attr(
                            argname + str(index) + "_threshold", in_scale
                        )
@@ -697,7 +696,7 @@ class ImperativeQuantizeOutputs:
                out_scale = utils.fp_numpy_to_naive(out_scale)
                if previous_op.type != "feed":
-                    res = utils._get_output_name_index(previous_op, in_var_name)
+                    res = _get_output_name_index(previous_op, in_var_name)
                    if res is not None:
                        argname, index = res
                        previous_op._set_attr(

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,19 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import math
 import numpy as np
 import paddle
 import paddle.nn.quant.quant_layers as quant_layers
-from ..utils import (
-    _get_op_input_var_names,
-    _get_op_output_var_names,
-    _get_output_name_index,
-    _get_input_name_index,
-)
 layer_name_map = {
    'Conv2DTranspose': paddle.nn.Conv2DTranspose,
    'Conv2D': paddle.nn.Conv2D,
@@ -42,7 +34,6 @@ layer_name_map = {
    'Softmax': paddle.nn.Softmax,
    'Swish': paddle.nn.Swish,
    'Tanh': paddle.nn.Tanh,
-    'Hardswish': paddle.nn.Hardswish,
    'BatchNorm': paddle.nn.BatchNorm,
    'GroupNorm': paddle.nn.GroupNorm,
    'LayerNorm': paddle.nn.LayerNorm,

--- a/python/paddle/fluid/contrib/quantize/__init__.py
+++ b/python/paddle/fluid/contrib/quantize/__init__.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,42 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import quantize_transpiler
+import logging
-from .quantize_transpiler import *
-__all__ = quantize_transpiler.__all__
+def get_logger(name, level, fmt=None):
+    """
+    Get logger from logging with given name, level and format without
+    setting logging basicConfig. For setting basicConfig in paddle
+    will disable basicConfig setting after import paddle.
+    Args:
+        name (str): The logger name.
+        level (logging.LEVEL): The base level of the logger
+        fmt (str): Format of logger output
+    Returns:
+        logging.Logger: logging logger with given settings
+    Examples:
+        .. code-block:: python
+            import paddle
+            import logging
+            logger = paddle.static.log_helper.get_logger(__name__, logging.INFO,
+                            fmt='%(asctime)s-%(levelname)s: %(message)s')
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    handler = logging.StreamHandler()
+    if fmt:
+        formatter = logging.Formatter(fmt=fmt, datefmt='%a %b %d %H:%M:%S')
+        handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    # stop propagate for propagating may print
+    # log multiple times
+    logger.propagate = False
+    return logger
--- a/python/paddle/static/quantization/__init__.py
+++ b/python/paddle/static/quantization/__init__.py
@@ -12,50 +12,55 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    QuantizationTransformPass,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    QuantizationFreezePass,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    ConvertToInt8Pass,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    TransformForMobilePass,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    OutScaleForTrainingPass,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    OutScaleForInferencePass,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    AddQuantDequantPass,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    ReplaceFakeQuantDequantPass,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import QuantWeightPass
+from .quantization_pass import (
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+    QuantWeightPass,
+)
+from .quantization_pass import (
    QuantizationTransformPassV2,
 )
-from ...fluid.contrib.slim.quantization.quantization_pass import (
+from .quantization_pass import (
    AddQuantDequantPassV2,
 )
-from ...fluid.contrib.slim.quantization.quant_int8_mkldnn_pass import (
+from .quantization_pass import (
+    AddQuantDequantForInferencePass,
+)
+from .quant_int8_mkldnn_pass import (
    QuantInt8MkldnnPass,
 )
-from ...fluid.contrib.slim.quantization.quant2_int8_mkldnn_pass import (
+from .quant2_int8_mkldnn_pass import (
    Quant2Int8MkldnnPass,
 )
-from ...fluid.contrib.slim.quantization.post_training_quantization import (
+from .post_training_quantization import (
    PostTrainingQuantization,
 )
-from ...fluid.contrib.slim.quantization.post_training_quantization import (
+from .post_training_quantization import (
    PostTrainingQuantizationProgram,
 )
-from ...fluid.contrib.slim.quantization.post_training_quantization import (
+from .post_training_quantization import (
    WeightQuantization,
 )
--- a/python/paddle/fluid/contrib/slim/quantization/adaround.py
+++ b/python/paddle/fluid/contrib/slim/quantization/adaround.py
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,25 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import numpy as np
-import time
-import sys
 import logging
-import paddle
+import sys
+import time
+import numpy as np
 import paddle
-import paddle.fluid as fluid
+import paddle.static as static
-from ....log_helper import get_logger
+from ..log_helper import get_logger
 from .utils import (
+    _channelwise_quant_axis1_ops,
+    bias_correction_w,
+    calculate_quant_cos_error,
+    dequant_tensor,
    load_variable_data,
+    quant_tensor,
    set_variable_data,
    stable_sigmoid,
-    quant_tensor,
-    dequant_tensor,
-    _channelwise_quant_axis1_ops,
-    calculate_quant_cos_error,
-    bias_correction_w,
 )
 _logger = get_logger(
@@ -42,7 +42,7 @@ ZETA = 1.1
 def compute_soft_rounding(alpha_v):
-    return fluid.layers.clip(
+    return paddle.clip(
        paddle.nn.functional.sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA,
        min=0,
        max=1,
@@ -83,11 +83,9 @@ class AdaRoundLoss:
            return round_loss
-        round_loss = paddle.static.nn.cond(
+        round_loss = static.nn.cond(
            warm_start,
-            lambda: fluid.layers.fill_constant(
+            lambda: paddle.full(shape=[1], dtype='float32', fill_value=0.0),
-                shape=[1], dtype='float32', value=0.0
-            ),
            round_loss_fn,
        )
@@ -151,7 +149,7 @@ class AdaRound:
            shape=alpha.shape,
            dtype="float32",
            name=var_name + ".alpha",
-            default_initializer=fluid.initializer.NumpyArrayInitializer(alpha),
+            default_initializer=paddle.nn.initializer.Assign(alpha),
        )
    def _calculate_output_with_adarounded_weights(
@@ -258,12 +256,12 @@ def run_adaround(
                fetch_op_name = quant_op_out_name
        # build adaround program
-        exec_strategy = fluid.ExecutionStrategy()
+        exec_strategy = static.ExecutionStrategy()
        exec_strategy.num_iteration_per_drop_scope = 1
-        startup_program = fluid.Program()
+        startup_program = static.Program()
-        train_program = fluid.Program()
+        train_program = static.Program()
-        with fluid.program_guard(train_program, startup_program):
+        with static.program_guard(train_program, startup_program):
-            with fluid.unique_name.guard():
+            with paddle.utils.unique_name.guard():
                # initialize adaround
                adaround = AdaRound(
                    scale,
@@ -273,21 +271,21 @@ def run_adaround(
                    weight_op_type=weight_op_type,
                    num_iterations=num_iterations,
                )
-                orig_out_tensor = fluid.data(
+                orig_out_tensor = static.data(
                    name='orig_out_tensor',
-                    shape=fp32_fetch_list.shape,
+                    shape=(-1,) + fp32_fetch_list.shape,
                    dtype='float32',
                )
-                adaround_out_tensor = fluid.data(
+                adaround_out_tensor = static.data(
                    name='adaround_out_tensor',
-                    shape=fp32_fetch_list.shape,
+                    shape=(-1,) + fp32_fetch_list.shape,
                    dtype='float32',
                )
-                beta_tensor = fluid.data(
+                beta_tensor = static.data(
-                    name='beta', shape=[1], dtype='float32'
+                    name='beta', shape=[-1, 1], dtype='float32'
                )
-                warm_start_tensor = fluid.data(
+                warm_start_tensor = static.data(
-                    name='warm_start', shape=[1], dtype='bool'
+                    name='warm_start', shape=[-1, 1], dtype='bool'
                )
                train_fetches_loss = adaround.get_loss(
@@ -296,7 +294,7 @@ def run_adaround(
                    adaround_out_tensor,
                    orig_out_tensor,
                )
-                optimizer = fluid.optimizer.Adam(learning_rate=lr)
+                optimizer = paddle.optimizer.Adam(learning_rate=lr)
                loss = train_fetches_loss['loss']
                optimizer.minimize(loss)
        exe.run(startup_program)

--- a/python/paddle/fluid/contrib/slim/quantization/cal_kl_threshold.py
+++ b/python/paddle/fluid/contrib/slim/quantization/cal_kl_threshold.py
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
 import logging
 import math
 import numpy as np
-from ....log_helper import get_logger
+from ..log_helper import get_logger
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
 )
-__all__ = ['cal_kl_threshold']
 def expand_quantized_bins(quantized_bins, reference_bins):
    '''

--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,43 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 import os
-import re
-import math
 import shutil
-import logging
 import numpy as np
 try:
    from tqdm import tqdm
 except:
    from .utils import tqdm
 from inspect import isgeneratorfunction
-from .... import io
-from .... import core
+from paddle.fluid.framework import IrGraph, _get_var
-from .... import reader
-from .... import framework
+from ... import io, static
-from .... import unique_name
+from ...fluid import reader
-from ....executor import global_scope, Executor
+from ...framework import core
-from ....framework import IrGraph
+from ...utils import unique_name
-from ....log_helper import get_logger
+from ..log_helper import get_logger
+from . import utils
+from .adaround import run_adaround
+from .cal_kl_threshold import cal_kl_threshold
 from .quantization_pass import (
+    AddQuantDequantPass,
+    AddQuantDequantPassV2,
+    QuantizationFreezePass,
    QuantizationTransformPass,
    QuantizationTransformPassV2,
-    QuantizationFreezePass,
    QuantWeightPass,
-    AddQuantDequantPass,
-    AddQuantDequantPassV2,
 )
-from .cal_kl_threshold import cal_kl_threshold
-from .adaround import run_adaround
-from . import utils
-__all__ = [
-    'PostTrainingQuantization',
-    'WeightQuantization',
-    'PostTrainingQuantizationProgram',
-]
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
@@ -156,10 +150,10 @@ class PostTrainingQuantization:
        Constructor.
        Args:
-            executor(fluid.Executor): The executor to load, run and save the
+            executor(static.Executor): The executor to load, run and save the
                quantized model.
-            scope(fluid.Scope, optional): The scope of the program, use it to load
+            scope(static.Scope, optional): The scope of the program, use it to load
-                and save variables. If scope=None, get scope by global_scope().
+                and save variables. If scope=None, get scope by static.global_scope().
            model_dir(str): The path of the fp32 model that will be quantized,
                and the model and params files are under the path.
            model_filename(str, optional): The name of file to load the inference
@@ -245,10 +239,10 @@ class PostTrainingQuantization:
        Examples:
        .. code-block:: python
-            import paddle.fluid as fluid
+            import paddle.static as static
-            from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
+            from paddle.static.quantization import PostTrainingQuantization
-            exe = fluid.Executor(fluid.CPUPlace())
+            exe = static.Executor(paddle.CPUPlace())
            model_dir = path/to/fp32_model_params
            # set model_filename as None when the filename is __model__,
            # otherwise set it as the real filename
@@ -344,7 +338,7 @@ class PostTrainingQuantization:
        # Save input params
        self._bias_correction = bias_correction
        self._executor = executor
-        self._scope = global_scope() if scope is None else scope
+        self._scope = static.global_scope() if scope is None else scope
        self._model_dir = model_dir
        self._model_filename = model_filename
        self._params_filename = params_filename
@@ -537,22 +531,29 @@ class PostTrainingQuantization:
        Args:
            save_model_path(str): The path to save the quantized model.
            model_filename(str, optional): If the model_filename is None,
-                save the model to '__model__'. Otherwise, save the model
+                save the model to 'model.pdmodel' and 'model.pdiparams'. Otherwise, save the model to 'model_name.pdmodel' and
-                to the specified filename. Default: None.
+                'model_name.pdiparams". Default: None.
-            params_filename(str, optional): If the params_filename is None,
-                save params to separted files. Otherwise, save all params
-                to the specified filename.
        Returns:
            None
        '''
-        io.save_inference_model(
+        model_name = None
-            dirname=save_model_path,
+        if model_filename is None:
-            model_filename=model_filename,
+            model_name = "model"
-            params_filename=params_filename,
+        elif model_filename.endswith(".pdmodel"):
-            feeded_var_names=self._feed_list,
+            model_name = model_filename.rsplit(".", 1)[0]
-            target_vars=self._fetch_list,
+        else:
+            model_name = model_filename
+        path_prefix = os.path.join(save_model_path, model_name)
+        feed_vars = [
+            self._program.global_block().var(name) for name in self._feed_list
+        ]
+        static.save_inference_model(
+            path_prefix,
+            feed_vars,
+            self._fetch_list,
            executor=self._executor,
-            main_program=self._program,
+            program=self._program,
            clip_extra=self._clip_extra,
        )
        _logger.info("The quantized model is saved in " + save_model_path)
@@ -567,8 +568,8 @@ class PostTrainingQuantization:
                self._program,
                self._feed_list,
                self._fetch_list,
-            ] = io.load_inference_model(
+            ] = static.load_inference_model(
-                dirname=self._model_dir,
+                self._model_dir,
                executor=self._executor,
                model_filename=self._model_filename,
                params_filename=self._params_filename,
@@ -578,7 +579,7 @@ class PostTrainingQuantization:
            self._optimize_fp32_model()
        feed_vars = [
-            framework._get_var(str(var_name), self._program)
+            _get_var(str(var_name), self._program)
            for var_name in self._feed_list
        ]
@@ -1632,17 +1633,17 @@ class WeightQuantization:
        # Load model
        place = core.CPUPlace()
-        exe = Executor(place)
+        exe = static.Executor(place)
-        scope = global_scope()
+        scope = static.global_scope()
-        [infer_program, feed_list, fetch_list] = io.load_inference_model(
+        [infer_program, feed_list, fetch_list] = static.load_inference_model(
-            dirname=self._model_dir,
+            self._model_dir,
            executor=exe,
            model_filename=self._model_filename,
            params_filename=self._params_filename,
        )
        # Clone and save fp16 weights
-        save_program = framework.Program()
+        save_program = static.Program()
        save_block = save_program.global_block()
        save_var_map = {}
@@ -1723,10 +1724,10 @@ class WeightQuantization:
        """
        # Load model
        place = core.CPUPlace()
-        exe = Executor(place)
+        exe = static.Executor(place)
-        scope = global_scope()
+        scope = static.global_scope()
-        [program, feed_list, fetch_list] = io.load_inference_model(
+        [program, feed_list, fetch_list] = static.load_inference_model(
-            dirname=self._model_dir,
+            self._model_dir,
            executor=exe,
            model_filename=self._model_filename,
            params_filename=self._params_filename,
@@ -1758,15 +1759,22 @@ class WeightQuantization:
                        self._weight_channel_wise_abs_max_quantization(
                            scope, place, weight_bits, op, var_name, for_test
                        )
+        model_name = None
-        io.save_inference_model(
+        if save_model_filename is None:
-            dirname=save_model_dir,
+            model_name = "model"
-            feeded_var_names=feed_list,
+        elif save_model_filename.endswith(".pdmodel"):
-            target_vars=fetch_list,
+            model_name = save_model_filename.rsplit(".", 1)[0]
+        else:
+            model_name = save_model_filename
+        path_prefix = os.path.join(save_model_dir, model_name)
+        feed_vars = [program.global_block().var(name) for name in feed_list]
+        static.save_inference_model(
+            path_prefix,
+            feed_vars,
+            fetch_list,
            executor=exe,
-            main_program=program,
+            program=program,
-            model_filename=save_model_filename,
-            params_filename=save_params_filename,
        )
    def _weight_abs_max_quantization(

--- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,11 +13,9 @@
 # limitations under the License.
 import numpy as np
-from .... import core
-from ....framework import IrGraph
-from ....framework import _get_paddle_place
-__all__ = ['Quant2Int8MkldnnPass']
+from ...fluid.framework import IrGraph
+from ...framework import _get_paddle_place, core
 OpRole = core.op_proto_and_checker_maker.OpRole

--- a/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,12 +13,9 @@
 # limitations under the License.
 import numpy as np
-from .... import core
-from ....framework import IrGraph
-from ....framework import IrNode
-from ....framework import _get_paddle_place
-__all__ = ['QuantInt8MkldnnPass']
+from ...fluid.framework import IrGraph
+from ...framework import _get_paddle_place
 class QuantInt8MkldnnPass:
@@ -40,23 +37,23 @@ class QuantInt8MkldnnPass:
    def __init__(self, _scope=None, _place=None):
        r"""
        Args:
-            scope(fluid.Scope): scope is used to initialize the new parameters.
+            scope(static.Scope): scope is used to initialize the new parameters.
-            place(fluid.CPUPlace|str): place is used to initialize the new parameters.
+            place(static.CPUPlace|str): place is used to initialize the new parameters.
            When it is string, it can be only 'cpu'.
        Examples:
        .. code-block:: python
            # The original graph will be rewrite.
-            import paddle.fluid as fluid
+            import paddle.static as static
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import QuantInt8MkldnnPass
            from paddle.fluid.framework import IrGraph
-            from paddle.fluid import core
+            from paddle.framework import core
-            graph = IrGraph(core.Graph(fluid.Program().desc), for_test=False)
+            graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
-            place = fluid.CPUPlace()
+            place = static.CPUPlace()
-            mkldnn_pass = QuantInt8MkldnnPass(fluid.global_scope(),
+            mkldnn_pass = QuantInt8MkldnnPass(static.global_scope(),
            place)
            mkldnn_pass.apply(graph)
        """

--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,39 +13,21 @@
 # limitations under the License.
 import collections
 import numpy as np
 try:
    from tqdm import tqdm
 except:
    from .utils import tqdm
-from .... import core
-from ....framework import IrGraph
-from ....framework import IrNode
-from ....framework import Operator
-from .... import unique_name
-from ....framework import Program, program_guard, default_startup_program
-from ....data import data
-from ....executor import scope_guard
-from ....framework import _get_paddle_place
-from . import utils
 import paddle
-__all__ = [
+from ...fluid.framework import IrGraph, IrNode
-    'QuantizationTransformPass',
+from ...framework import _get_paddle_place, core
-    'QuantizationFreezePass',
+from ...static import Program, data, program_guard, scope_guard
-    'ConvertToInt8Pass',
+from ...utils import unique_name
-    'TransformForMobilePass',
+from . import utils
-    'OutScaleForTrainingPass',
-    'OutScaleForInferencePass',
-    'AddQuantDequantPass',
-    'QuantizationTransformPassV2',
-    'AddQuantDequantPassV2',
-    'ReplaceFakeQuantDequantPass',
-    'QuantWeightPass',
-    'AddQuantDequantForInferencePass',
-]
 _fake_quant_op_list = [
    'fake_quantize_abs_max',
@@ -137,10 +119,10 @@ class QuantizationTransformPass:
        Constructor.
        Args:
-            scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
+            scope(static.Scope): When activation use 'range_abs_max' as the quantize
                type, this pass will create some new parameters. The scope is used to
                initialize these new parameters.
-            place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to initialize new
+            place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
                parameters described above. If it's string, It can be ``cpu``, and ``gpu:x``,
                where ``x`` is the index of the GPUs.
            weight_bits(int): quantization bit number for weights,
@@ -197,15 +179,15 @@ class QuantizationTransformPass:
        Examples:
        .. code-block:: python
            # The original graph will be rewrite.
-            import paddle.fluid as fluid
+            import paddle.static as static
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import QuantizationTransformPass
-            from paddle.fluid.contrib.slim.graph import IrGraph
+            from paddle.fluid.framework import IrGraph
-            from paddle.fluid import core
+            from paddle.framework import core
-            graph = IrGraph(core.Graph(program.desc), for_test=False)
+            graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
-            place = fluid.CPUPlace()
+            place = paddle.CPUPlace()
-            transform_pass = QuantizationTransformPass(fluid.global_scope(),
+            transform_pass = QuantizationTransformPass(static.global_scope(),
            place)
            transform_pass.apply(graph)
        """
@@ -1094,8 +1076,8 @@ class QuantizationFreezePass:
            and weight will be scaled offline.
        Args:
-            scope(fluid.Scope): scope is used to get the weight tensor values.
+            scope(static.Scope): scope is used to get the weight tensor values.
-            place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the weight tensors.
+            place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the weight tensors.
                If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
            bias_correction(bool): whether use bias correction for post-training quantization.
                 https://arxiv.org/abs/1810.05723.
@@ -1190,7 +1172,7 @@ class QuantizationFreezePass:
                            )
                            quantized_param_v = np.round(quantized_param_v)
                            # Weight bias correction
-                            if self._bias_correction == True:
+                            if self._bias_correction is True:
                                quantized_param_v = utils.bias_correction_w(
                                    param_v,
                                    quantized_param_v,
@@ -1459,8 +1441,8 @@ class ConvertToInt8Pass:
        Convert the weights into int8_t type.
        Args:
-            scope(fluid.Scope): scope is used to get the weight tensor values.
+            scope(static.Scope): scope is used to get the weight tensor values.
-            place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the
+            place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the
                8bits weight tensors. If it's string, It can be ``cpu``, and ``gpu:x``,
                where ``x`` is the index of the GPUs.
            quantizable_op_type(list[str]): This input param will be removed latter. The pass
@@ -1602,8 +1584,8 @@ class OutScaleForTrainingPass:
        These output scales may be used by tensorRT or some other inference engines.
        Args:
-            scope(fluid.Scope): The scope is used to initialize these new parameters.
+            scope(static.Scope): The scope is used to initialize these new parameters.
-            place(fluid.CPUPlace|fluid.CUDAPlace|str): The place is used to initialize new parameters.
+            place(static.CPUPlace|static.CUDAPlace|str): The place is used to initialize new parameters.
                If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the
                index of the GPUs.
            moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
@@ -1764,7 +1746,7 @@ class OutScaleForInferencePass:
        These output scales may be used by tensorRT or some other inference engines.
        Args:
-            scope(fluid.Scope): The scope is used to initialize these new parameters.
+            scope(static.Scope): The scope is used to initialize these new parameters.
        """
        self._scope = scope
        self._teller_set = utils.QUANT_SUPPORTED_OP_TYPE_LIST
@@ -1856,8 +1838,8 @@ class AddQuantDequantPass:
        Constructor.
        Args:
-            scope(fluid.Scope): The scope is used to initialize these new parameters.
+            scope(static.Scope): The scope is used to initialize these new parameters.
-            place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to initialize new
+            place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
                parameters described above. If ``place`` is string, it can be It can be ``cpu``
                or ``gpu:x``, where ``x`` is the index of the GPUs.
            moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max'
@@ -2452,12 +2434,12 @@ class QuantizationTransformPassV2(QuantizationTransformPass):
        .. code-block:: python
            # The original graph will be rewrite.
            import paddle
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import QuantizationTransformPassV2
-            from paddle.fluid.contrib.slim.graph import IrGraph
+            from paddle.fluid.framework import IrGraph
-            from paddle.fluid import core
+            from paddle.framework import core
-            graph = IrGraph(core.Graph(program.desc), for_test=False)
+            graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
            place = paddle.CPUPlace()
            scope = paddle.static.global_scope()
            transform_pass = QuantizationTransformPassV2(scope, place)
@@ -2810,12 +2792,12 @@ class AddQuantDequantPassV2:
        .. code-block:: python
            # The original graph will be rewrite.
            import paddle
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import AddQuantDequantPassV2
-            from paddle.fluid.contrib.slim.graph import IrGraph
+            from paddle.fluid.framework import IrGraph
-            from paddle.fluid import core
+            from paddle.framework import core
-            graph = IrGraph(core.Graph(program.desc), for_test=False)
+            graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
            place = paddle.CPUPlace()
            scope = paddle.static.global_scope()
            add_quant_dequant_pass = AddQuantDequantPassV2(scope, place)
@@ -2977,12 +2959,12 @@ class ReplaceFakeQuantDequantPass:
        .. code-block:: python
            # The original graph will be rewrite.
            import paddle
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import ReplaceFakeQuantDequantPass
-            from paddle.fluid.contrib.slim.graph import IrGraph
+            from paddle.fluid.framework import IrGraph
-            from paddle.fluid import core
+            from paddle.framework import core
-            graph = IrGraph(core.Graph(program.desc), for_test=False)
+            graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
            place = paddle.CPUPlace()
            scope = paddle.static.global_scope()
            replace_pass = ReplaceFakeQuantDequantPass(scope, place)
@@ -3133,12 +3115,12 @@ class QuantWeightPass:
        .. code-block:: python
            # The original graph will be rewrite.
            import paddle
-            from paddle.fluid.contrib.slim.quantization \
+            from paddle.static.quantization \
                import QuantWeightPass
-            from paddle.fluid.contrib.slim.graph import IrGraph
+            from paddle.fluid.framework import IrGraph
-            from paddle.fluid import core
+            from paddle.framework import core
-            graph = IrGraph(core.Graph(program.desc), for_test=False)
+            graph = IrGraph(core.Graph(paddle.static.Program().desc), for_test=False)
            place = paddle.CPUPlace()
            scope = paddle.static.global_scope()
            quant_weight_pass = QuantWeightPass(scope, place)
@@ -3207,7 +3189,7 @@ class QuantWeightPass:
                        bits_length,
                        onnx_format=True,
                    )
-                    if self._bias_correction == True:
+                    if self._bias_correction is True:
                        quantized_param_v = utils.bias_correction_w(
                            param_v,
                            quantized_param_v,
@@ -3264,7 +3246,7 @@ class AddQuantDequantForInferencePass:
    def __init__(self, scope, place, quant_bits=8):
        """
        Args:
-            scope(fluid.Scope): The scope is used to initialize these new parameters.
+            scope(static.Scope): The scope is used to initialize these new parameters.
            place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
                If it's string, it can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
            quant_bits(int, optional): quantization bit number for weight. Default is 8.

--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -250,7 +250,6 @@ if(WIN32)
  list(REMOVE_ITEM TEST_OPS test_post_training_quantization_lstm_model)
  list(REMOVE_ITEM TEST_OPS test_imperative_ptq)
  list(REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1)
-  list(REMOVE_ITEM TEST_OPS test_quantize_transpiler_v2)
  list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp)
  list(REMOVE_ITEM TEST_OPS test_imperative_qat_lsq)
  list(REMOVE_ITEM TEST_OPS test_imperative_qat_matmul)

--- a/python/paddle/fluid/contrib/slim/tests/README.md
+++ b/python/paddle/fluid/contrib/slim/tests/README.md
@@ -91,17 +91,18 @@ Having gathered all the data needed for quantization we apply the `cpu_quantize_
 The code snipped shows how the `Quant2Int8MkldnnPass` can be applied to a model graph:
 ```python
-    import paddle.fluid as fluid
+    import paddle
-    from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
+    import paddle.static as static
+    from paddle.static.quantization import Quant2Int8MkldnnPass
    from paddle.fluid.framework import IrGraph
-    from paddle.fluid import core
+    from paddle.framework import core
    # Create the IrGraph by Program
-    graph = IrGraph(core.Graph(fluid.Program().desc), for_test=False)
+    graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
-    place = fluid.CPUPlace()
+    place = paddle.CPUPlace()
    # Convert the IrGraph to MKL-DNN supported INT8 IrGraph using the
    # Quant2Int8MkldnnPass. It requires a list of operators to be quantized
-    mkldnn_pass = Quant2Int8MkldnnPass({'conv2d', 'pool2d'}, fluid.global_scope(), place, fluid.core, False)
+    mkldnn_pass = Quant2Int8MkldnnPass({'conv2d', 'pool2d'}, static.global_scope(), place, core, False)
    # Apply Quant2Int8MkldnnPass to IrGraph
    mkldnn_pass.apply(graph)
@@ -263,7 +264,7 @@ The following options are also accepted:
 ```bash
 cd /PATH/TO/PADDLE
-OMP_NUM_THREADS=28 FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py --quant_model=/PATH/TO/DOWNLOADED/QUANT/MODEL --fp32_model=/PATH/TO/DOWNLOADED/FP32/MODEL --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=50 --batch_num=1000 --acc_diff_threshold=0.01 --ops_to_quantize="conv2d,pool2d"
+OMP_NUM_THREADS=28 FLAGS_use_mkldnn=true python python/paddle/static/quantization/slim/tests/quant2_int8_image_classification_comparison.py --quant_model=/PATH/TO/DOWNLOADED/QUANT/MODEL --fp32_model=/PATH/TO/DOWNLOADED/FP32/MODEL --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=50 --batch_num=1000 --acc_diff_threshold=0.01 --ops_to_quantize="conv2d,pool2d"
 ```
 > Notes: Due to a large amount of images in the `int8_full_val.bin` dataset (50 000), the accuracy benchmark may last long. To accelerate accuracy measuring, it is recommended to set `OMP_NUM_THREADS` to the maximum number of physical cores available on the server.
@@ -276,7 +277,7 @@ To reproduce the performance results, the environment variable `OMP_NUM_THREADS=
   ```bash
   cd /PATH/TO/PADDLE/build
-   python ../python/paddle/fluid/contrib/slim/tests/save_quant_model.py --quant_model_path=/PATH/TO/DOWNLOADED/QUANT/MODEL --int8_model_save_path=/PATH/TO/SAVE/QUANT/INT8/MODEL --ops_to_quantize="conv2d,pool2d"
+   python ../python/paddle/static/quantization/slim/tests/save_quant_model.py --quant_model_path=/PATH/TO/DOWNLOADED/QUANT/MODEL --int8_model_save_path=/PATH/TO/SAVE/QUANT/INT8/MODEL --ops_to_quantize="conv2d,pool2d"
   ```
 2. Run the C-API test for performance benchmark.

--- a/python/paddle/fluid/contrib/slim/tests/__init__.py
+++ b/python/paddle/fluid/contrib/slim/tests/__init__.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py
+++ b/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py
-#   copyright (c) 2020 paddlepaddle authors. all rights reserved.
+#   copyright (c) 2022 paddlepaddle authors. all rights reserved.
 #
 # licensed under the apache license, version 2.0 (the "license");
 # you may not use this file except in compliance with the license.
@@ -12,14 +12,14 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
-import unittest
+import argparse
 import os
 import sys
-import argparse
+import unittest
-import paddle.fluid as fluid
-from paddle.fluid.framework import IrGraph
-from paddle.fluid import core
 import paddle
+from paddle.fluid.framework import IrGraph
+from paddle.framework import core
 paddle.enable_static()
@@ -47,29 +47,32 @@ def parse_args():
 def generate_dot_for_model(model_path, save_graph_dir, save_graph_name):
-    place = fluid.CPUPlace()
+    place = paddle.CPUPlace()
-    exe = fluid.Executor(place)
+    exe = paddle.static.Executor(place)
-    inference_scope = fluid.executor.global_scope()
+    inference_scope = paddle.static.global_scope()
-    with fluid.scope_guard(inference_scope):
+    with paddle.static.scope_guard(inference_scope):
        if os.path.exists(os.path.join(model_path, '__model__')):
            [
                inference_program,
                feed_target_names,
                fetch_targets,
-            ] = fluid.io.load_inference_model(model_path, exe)
+            ] = paddle.fluid.io.load_inference_model(model_path, exe)
        else:
            [
                inference_program,
                feed_target_names,
                fetch_targets,
-            ] = fluid.io.load_inference_model(
+            ] = paddle.static.load_inference_model(
-                model_path, exe, 'model', 'params'
+                model_path,
+                exe,
+                model_filename='model',
+                params_filename='params',
            )
        graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
        if not os.path.exists(save_graph_dir):
            os.makedirs(save_graph_dir)
        model_name = os.path.basename(os.path.normpath(save_graph_dir))
-        if save_graph_name is '':
+        if save_graph_name == '':
            save_graph_name = model_name
        graph.draw(save_graph_dir, save_graph_name, graph.all_op_nodes())
        print(

--- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
+++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
@@ -11,18 +11,27 @@
 # without warranties or conditions of any kind, either express or implied.
 # see the license for the specific language governing permissions and
 # limitations under the license.
-import numpy as np
 import logging
-import paddle
+import numpy as np
-import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.nn import Sequential
-from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
-from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
-from paddle.nn import BatchNorm1D
-from paddle.fluid.log_helper import get_logger
+import paddle
+from paddle.framework import ParamAttr
+from paddle.nn import (
+    BatchNorm1D,
+    BatchNorm2D,
+    Conv2D,
+    LeakyReLU,
+    Linear,
+    MaxPool2D,
+    PReLU,
+    ReLU,
+    ReLU6,
+    Sequential,
+    Sigmoid,
+    Softmax,
+)
+from paddle.static.log_helper import get_logger
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
@@ -86,18 +95,18 @@ def train_lenet(lenet, reader, optimizer):
    return loss_list
-class ImperativeLenet(fluid.dygraph.Layer):
+class ImperativeLenet(paddle.nn.Layer):
    def __init__(self, num_classes=10):
        super().__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w1_attr = ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        conv2d_w2_attr = ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w1_attr = ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w2_attr = ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        fc_w3_attr = ParamAttr(name="fc_w_3")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        conv2d_b2_attr = ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b1_attr = ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b2_attr = ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        fc_b3_attr = ParamAttr(name="fc_b_3")
        self.features = Sequential(
            Conv2D(
                in_channels=1,
@@ -155,26 +164,26 @@ class ImperativeLenet(fluid.dygraph.Layer):
        x = self.quant_stub(inputs)
        x = self.features(x)
-        x = paddle.flatten(x, 1, -1)
+        x = paddle.flatten(x, 1)
        x = self.add(x, paddle.to_tensor(0.0))  # For CI
        x = self.fc(x)
        return x
-class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
+class ImperativeLenetWithSkipQuant(paddle.nn.Layer):
    def __init__(self, num_classes=10):
        super().__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w1_attr = ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        conv2d_w2_attr = ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w1_attr = ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w2_attr = ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        fc_w3_attr = ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
+        conv2d_b1_attr = ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        conv2d_b2_attr = ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b1_attr = ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b2_attr = ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        fc_b3_attr = ParamAttr(name="fc_b_3")
        self.conv2d_0 = Conv2D(
            in_channels=1,
            out_channels=6,
@@ -240,8 +249,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
        x = self.relu6_0(x)
        x = self.pool2d_1(x)
-        x = paddle.flatten(x, 1, -1)
+        x = paddle.flatten(x, 1)
        x = self.linear_0(x)
        x = self.leaky_relu_0(x)
        x = self.linear_1(x)
@@ -252,7 +260,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
        return x
-class ImperativeLinearBn(fluid.dygraph.Layer):
+class ImperativeLinearBn(paddle.nn.Layer):
    def __init__(self):
        super().__init__()
@@ -284,7 +292,7 @@ class ImperativeLinearBn(fluid.dygraph.Layer):
        return x
-class ImperativeLinearBn_hook(fluid.dygraph.Layer):
+class ImperativeLinearBn_hook(paddle.nn.Layer):
    def __init__(self):
        super().__init__()

--- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py
+++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py
@@ -12,19 +12,20 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
-import unittest
-import os
-import sys
 import argparse
 import logging
+import os
 import struct
-import numpy as np
+import sys
 import time
+import unittest
+import numpy as np
 import paddle
-import paddle.fluid as fluid
 from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
+from paddle.framework import core
-from paddle.fluid import core
+from paddle.static.quantization import Quant2Int8MkldnnPass
 paddle.enable_static()
@@ -185,23 +186,26 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase):
        target='quant',
    ):
        assert target in ['quant', 'int8', 'fp32']
-        place = fluid.CPUPlace()
+        place = paddle.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = paddle.static.Executor(place)
-        inference_scope = fluid.executor.global_scope()
+        inference_scope = paddle.static.global_scope()
-        with fluid.scope_guard(inference_scope):
+        with paddle.static.scope_guard(inference_scope):
            if os.path.exists(os.path.join(model_path, '__model__')):
                [
                    inference_program,
                    feed_target_names,
                    fetch_targets,
-                ] = fluid.io.load_inference_model(model_path, exe)
+                ] = paddle.fluid.io.load_inference_model(model_path, exe)
            else:
                [
                    inference_program,
                    feed_target_names,
                    fetch_targets,
-                ] = fluid.io.load_inference_model(
+                ] = paddle.static.load_inference_model(
-                    model_path, exe, 'model', 'params'
+                    model_path,
+                    exe,
+                    model_filename='model',
+                    params_filename='params',
                )
            graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
@@ -359,7 +363,7 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase):
        return set(map(int, string.split(',')))
    def test_graph_transformation(self):
-        if not fluid.core.is_compiled_with_mkldnn():
+        if not core.is_compiled_with_mkldnn():
            return
        quant_model_path = test_case_args.quant_model

--- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py
+++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py
@@ -13,15 +13,17 @@
 # limitations under the License.
 import argparse
-import numpy as np
 import struct
 import sys
 import time
 import unittest
-from paddle import fluid
-from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
+import numpy as np
 from save_quant_model import transform_and_save_int8_model
+import paddle
+from paddle.framework import core
 def parse_args():
    parser = argparse.ArgumentParser()
@@ -80,17 +82,19 @@ class TestLstmModelPTQ(unittest.TestCase):
                    [len(feat) // 4 // 8, 8]
                )
                lod_feat = [feat.shape[0]]
-                minputs = fluid.create_lod_tensor(feat, [lod_feat], place)
+                minputs = paddle.fluid.create_lod_tensor(
+                    feat, [lod_feat], place
+                )
-                infer_data = fluid.core.PaddleTensor()
+                infer_data = core.PaddleTensor()
                infer_data.lod = minputs.lod()
-                infer_data.data = fluid.core.PaddleBuf(np.array(minputs))
+                infer_data.data = core.PaddleBuf(np.array(minputs))
                infer_data.shape = minputs.shape()
-                infer_data.dtype = fluid.core.PaddleDType.FLOAT32
+                infer_data.dtype = core.PaddleDType.FLOAT32
-                infer_label = fluid.core.PaddleTensor()
+                infer_label = core.PaddleTensor()
-                infer_label.data = fluid.core.PaddleBuf(np.array(label))
+                infer_label.data = core.PaddleBuf(np.array(label))
                infer_label.shape = label.shape
-                infer_label.dtype = fluid.core.PaddleDType.INT32
+                infer_label.dtype = core.PaddleDType.INT32
                data.append([infer_data, infer_label])
        warmup_data = data[:1]
        inputs = data[1:]
@@ -105,7 +109,7 @@ class TestLstmModelPTQ(unittest.TestCase):
        use_analysis=False,
        enable_ptq=False,
    ):
-        config = AnalysisConfig(model_path)
+        config = core.AnalysisConfig(model_path)
        config.set_cpu_math_library_num_threads(num_threads)
        if use_analysis:
            config.disable_gpu()
@@ -132,7 +136,7 @@ class TestLstmModelPTQ(unittest.TestCase):
        use_analysis=False,
        enable_ptq=False,
    ):
-        place = fluid.CPUPlace()
+        place = paddle.CPUPlace()
        warmup_data, inputs = self.get_warmup_tensor(data_path, place)
        warmup_data = [item[0] for item in warmup_data]
        config = self.set_config(
@@ -144,7 +148,7 @@ class TestLstmModelPTQ(unittest.TestCase):
            enable_ptq,
        )
-        predictor = create_paddle_predictor(config)
+        predictor = core.create_paddle_predictor(config)
        data = [item[0] for item in inputs]
        label = np.array([item[1] for item in inputs])
@@ -197,7 +201,7 @@ class TestLstmModelPTQ(unittest.TestCase):
        return hx_acc, ctc_acc, fps
    def test_lstm_model(self):
-        if not fluid.core.is_compiled_with_mkldnn():
+        if not core.is_compiled_with_mkldnn():
            return
        fp32_model = test_case_args.fp32_model

--- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py
+++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py
@@ -12,18 +12,19 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
-import unittest
-import os
-import sys
 import argparse
 import logging
-import numpy as np
+import os
+import sys
 import time
+import unittest
+import numpy as np
 import paddle
-import paddle.fluid as fluid
 from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
+from paddle.framework import core
-from paddle.fluid import core
+from paddle.static.quantization import Quant2Int8MkldnnPass
 paddle.enable_static()
@@ -158,23 +159,26 @@ class QuantInt8NLPComparisonTest(unittest.TestCase):
        target='quant',
    ):
        assert target in ['quant', 'int8', 'fp32']
-        place = fluid.CPUPlace()
+        place = paddle.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = paddle.static.Executor(place)
-        inference_scope = fluid.executor.global_scope()
+        inference_scope = paddle.static.global_scope()
-        with fluid.scope_guard(inference_scope):
+        with paddle.static.scope_guard(inference_scope):
            if os.path.exists(os.path.join(model_path, '__model__')):
                [
                    inference_program,
                    feed_target_names,
                    fetch_targets,
-                ] = fluid.io.load_inference_model(model_path, exe)
+                ] = paddle.fluid.io.load_inference_model(model_path, exe)
            else:
                [
                    inference_program,
                    feed_target_names,
                    fetch_targets,
-                ] = fluid.io.load_inference_model(
+                ] = paddle.static.load_inference_model(
-                    model_path, exe, 'model', 'params'
+                    model_path,
+                    exe,
+                    model_filename='model',
+                    params_filename='params',
                )
            graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
@@ -296,7 +300,7 @@ class QuantInt8NLPComparisonTest(unittest.TestCase):
        return set(map(int, string.split(',')))
    def test_graph_transformation(self):
-        if not fluid.core.is_compiled_with_mkldnn():
+        if not core.is_compiled_with_mkldnn():
            return
        quant_model_path = test_case_args.quant_model

--- a/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py
+++ b/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py
@@ -12,19 +12,20 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
-import unittest
-import os
-import sys
 import argparse
 import logging
+import os
 import struct
-import numpy as np
+import sys
 import time
+import unittest
+import numpy as np
 import paddle
-import paddle.fluid as fluid
 from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import QuantInt8MkldnnPass
+from paddle.framework import core
-from paddle.fluid import core
+from paddle.static.quantization import QuantInt8MkldnnPass
 paddle.enable_static()
@@ -163,23 +164,26 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase):
        skip_batch_num=0,
        transform_to_int8=False,
    ):
-        place = fluid.CPUPlace()
+        place = paddle.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = paddle.static.Executor(place)
-        inference_scope = fluid.executor.global_scope()
+        inference_scope = paddle.static.global_scope()
-        with fluid.scope_guard(inference_scope):
+        with paddle.static.scope_guard(inference_scope):
            if os.path.exists(os.path.join(model_path, '__model__')):
                [
                    inference_program,
                    feed_target_names,
                    fetch_targets,
-                ] = fluid.io.load_inference_model(model_path, exe)
+                ] = paddle.fluid.io.load_inference_model(model_path, exe)
            else:
                [
                    inference_program,
                    feed_target_names,
                    fetch_targets,
-                ] = fluid.io.load_inference_model(
+                ] = paddle.static.load_inference_model(
-                    model_path, exe, 'model', 'params'
+                    model_path,
+                    exe,
+                    model_filename='model',
+                    params_filename='params',
                )
            graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
@@ -298,7 +302,7 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase):
        assert fp32_acc1 - int8_acc1 <= threshold
    def test_graph_transformation(self):
-        if not fluid.core.is_compiled_with_mkldnn():
+        if not core.is_compiled_with_mkldnn():
            return
        quant_model_path = test_case_args.quant_model

--- a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py
+++ b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py
@@ -12,15 +12,15 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
-import unittest
+import argparse
 import os
 import sys
-import argparse
+import unittest
 import paddle
-import paddle.fluid as fluid
 from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
+from paddle.framework import core
-from paddle.fluid import core
+from paddle.static.quantization import Quant2Int8MkldnnPass
 paddle.enable_static()
@@ -93,35 +93,41 @@ def transform_and_save_int8_model(
    debug=False,
    quant_model_filename='',
    quant_params_filename='',
-    save_model_filename="__model__",
+    save_model_filename="model",
    save_params_filename=None,
 ):
-    place = fluid.CPUPlace()
+    place = paddle.CPUPlace()
-    exe = fluid.Executor(place)
+    exe = paddle.static.Executor(place)
-    inference_scope = fluid.executor.global_scope()
+    inference_scope = paddle.static.global_scope()
-    with fluid.scope_guard(inference_scope):
+    with paddle.static.scope_guard(inference_scope):
        if not quant_model_filename:
            if os.path.exists(os.path.join(original_path, '__model__')):
                [
                    inference_program,
                    feed_target_names,
                    fetch_targets,
-                ] = fluid.io.load_inference_model(original_path, exe)
+                ] = paddle.fluid.io.load_inference_model(original_path, exe)
            else:
                [
                    inference_program,
                    feed_target_names,
                    fetch_targets,
-                ] = fluid.io.load_inference_model(
+                ] = paddle.static.load_inference_model(
-                    original_path, exe, 'model', 'params'
+                    original_path,
+                    exe,
+                    model_filename='model',
+                    params_filename='params',
                )
        else:
            [
                inference_program,
                feed_target_names,
                fetch_targets,
-            ] = fluid.io.load_inference_model(
+            ] = paddle.static.load_inference_model(
-                original_path, exe, quant_model_filename, quant_params_filename
+                original_path,
+                exe,
+                model_filename=quant_model_filename,
+                params_filename=quant_params_filename,
            )
        ops_to_quantize_set = set()
@@ -147,15 +153,18 @@ def transform_and_save_int8_model(
        )
        graph = transform_to_mkldnn_int8_pass.apply(graph)
        inference_program = graph.to_program()
-        with fluid.scope_guard(inference_scope):
+        with paddle.static.scope_guard(inference_scope):
-            fluid.io.save_inference_model(
+            path_prefix = os.path.join(save_path, save_model_filename)
-                save_path,
+            feed_vars = [
-                feed_target_names,
+                inference_program.global_block().var(name)
+                for name in feed_target_names
+            ]
+            paddle.static.save_inference_model(
+                path_prefix,
+                feed_vars,
                fetch_targets,
-                exe,
+                executor=exe,
-                inference_program,
+                program=inference_program,
-                model_filename=save_model_filename,
-                params_filename=save_params_filename,
            )
        print(
            "Success! INT8 model obtained from the Quant model can be found at {}\n".format(

--- a/python/paddle/fluid/contrib/slim/tests/test_graph.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_graph.py
@@ -13,12 +13,13 @@
 # limitations under the license.
 import os
-import numpy as np
 import unittest
+import numpy as np
 import paddle
-import paddle.fluid as fluid
 from paddle.fluid.framework import IrGraph
-from paddle.fluid import core
+from paddle.framework import core
 paddle.enable_static()
@@ -27,63 +28,68 @@ os.environ["CPU_NUM"] = "1"
 def conv_block():
-    img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
+    img = paddle.static.data(
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        name='image', shape=[-1, 1, 28, 28], dtype='float32'
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
+    )
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
+    conv_out_1 = paddle.static.nn.conv2d(
        input=img,
        filter_size=5,
        num_filters=20,
-        pool_size=2,
+        act='relu',
-        pool_stride=2,
+    )
-        act="relu",
+    conv_pool_1 = paddle.nn.functional.max_pool2d(
+        conv_out_1, kernel_size=2, stride=2
    )
    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
+    conv_out_2 = paddle.static.nn.conv2d(
        input=conv_pool_1,
        filter_size=5,
-        num_filters=50,
+        num_filters=20,
-        pool_size=2,
+        act='relu',
-        pool_stride=2,
+    )
-        act="relu",
+    conv_pool_2 = paddle.nn.functional.max_pool2d(
+        conv_out_2, kernel_size=2, stride=2
    )
-    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
+    prediction = paddle.static.nn.fc(
-    loss = paddle.nn.functional.cross_entropy(
+        x=conv_pool_2, size=10, activation='softmax'
-        input=prediction, label=label, reduction='none', use_softmax=False
    )
+    loss = paddle.nn.functional.cross_entropy(input=prediction, label=label)
    avg_loss = paddle.mean(loss)
    return [img, label], avg_loss
 class TestGraph(unittest.TestCase):
    def graph_apis(self, use_cuda=False, for_ci=True):
-        main = fluid.Program()
+        main = paddle.static.Program()
-        startup = fluid.Program()
+        startup = paddle.static.Program()
-        with fluid.unique_name.guard():
+        with paddle.utils.unique_name.guard():
-            with fluid.program_guard(main, startup):
+            with paddle.static.program_guard(main, startup):
                feeds, loss = conv_block()
-                opt = fluid.optimizer.Adam(learning_rate=0.001)
+                opt = paddle.optimizer.Adam(learning_rate=0.001)
                opt.minimize(loss)
        graph = IrGraph(core.Graph(main.desc), for_test=False)
        backup_graph = graph.clone()
        self.assertEqual(len(graph.all_nodes()), len(backup_graph.all_nodes()))
-        build_strategy = fluid.BuildStrategy()
+        build_strategy = paddle.static.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
-        origin_binary = fluid.CompiledProgram(graph.graph).with_data_parallel(
+        origin_binary = paddle.static.CompiledProgram(
-            loss_name=loss.name, build_strategy=build_strategy
+            graph.graph
-        )
+        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
-        backup_binary = fluid.CompiledProgram(
+        backup_binary = paddle.static.CompiledProgram(
            backup_graph.graph
        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
-        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+        place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = paddle.static.Executor(place)
        exe.run(startup)
        iters = 5
        batch_size = 8
        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=batch_size
        )
-        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
+        feeder = paddle.fluid.DataFeeder(feed_list=feeds, place=place)
        def _train(binary):
            for _ in range(iters):
@@ -105,17 +111,29 @@ class TestGraph(unittest.TestCase):
            var.set(var_array, place)
        sum_before = np.sum(
-            np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())
+            np.array(
+                paddle.static.global_scope()
+                .find_var('conv2d_1.w_0')
+                .get_tensor()
            )
-        fluid.io._save_persistable_nodes(exe, checkponit_dir, graph)
+        )
-        _set_zero('conv2d_1.w_0', fluid.global_scope(), place)
+        paddle.fluid.io._save_persistable_nodes(exe, checkponit_dir, graph)
+        _set_zero('conv2d_1.w_0', paddle.static.global_scope(), place)
        set_after = np.sum(
-            np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())
+            np.array(
+                paddle.static.global_scope()
+                .find_var('conv2d_1.w_0')
+                .get_tensor()
+            )
        )
        self.assertEqual(set_after, 0)
-        fluid.io._load_persistable_nodes(exe, checkponit_dir, graph)
+        paddle.fluid.io._load_persistable_nodes(exe, checkponit_dir, graph)
        sum_after = np.sum(
-            np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())
+            np.array(
+                paddle.static.global_scope()
+                .find_var('conv2d_1.w_0')
+                .get_tensor()
+            )
        )
        self.assertEqual(sum_before, sum_after)
@@ -144,7 +162,7 @@ class TestGraph(unittest.TestCase):
        self.graph_apis(use_cuda=False, for_ci=True)
    def test_graph_apis_cuda(self):
-        if fluid.core.is_compiled_with_cuda():
+        if core.is_compiled_with_cuda():
            self.graph_apis(use_cuda=True, for_ci=True)

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
@@ -13,38 +13,31 @@
 # limitations under the license.
 import os
-import numpy as np
-import random
-import unittest
-import logging
-import warnings
 import tempfile
+import unittest
+import numpy as np
+from imperative_test_utils import fix_model_dict, train_lenet
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
+from paddle.framework import core, set_flags
-from paddle.fluid import core
+from paddle.nn import (
-from paddle.fluid.optimizer import AdamOptimizer
+    BatchNorm2D,
-from paddle.fluid.framework import IrGraph
+    Conv2D,
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
+    Linear,
-from paddle.nn import Sequential
+    MaxPool2D,
-from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+    Sequential,
-from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, PReLU
+    Softmax,
-from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
+)
-from paddle.fluid.log_helper import get_logger
+from paddle.nn.layer import LeakyReLU, PReLU, ReLU, Sigmoid
-from paddle.fluid.dygraph import nn
+from paddle.quantization import ImperativeQuantAware
-from imperative_test_utils import fix_model_dict, train_lenet
 paddle.enable_static()
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+    set_flags({"FLAGS_cudnn_deterministic": True})
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
-)
 def get_vaild_warning_num(warning, w):
@@ -55,18 +48,18 @@ def get_vaild_warning_num(warning, w):
    return num
-class ImperativeLenet(fluid.dygraph.Layer):
+class ImperativeLenet(paddle.nn.Layer):
    def __init__(self, num_classes=10):
        super().__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
        self.features = Sequential(
            Conv2D(
                in_channels=1,
@@ -121,7 +114,7 @@ class ImperativeLenet(fluid.dygraph.Layer):
    def forward(self, inputs):
        x = self.features(inputs)
-        x = paddle.flatten(x, 1, -1)
+        x = paddle.flatten(x, 1)
        x = self.fc(x)
        return x
@@ -152,8 +145,8 @@ class TestImperativeOutSclae(unittest.TestCase):
        with fluid.dygraph.guard():
            np.random.seed(seed)
-            fluid.default_main_program().random_seed = seed
+            paddle.static.default_main_program().random_seed = seed
-            fluid.default_startup_program().random_seed = seed
+            paddle.static.default_startup_program().random_seed = seed
            lenet = ImperativeLenet()
            lenet = fix_model_dict(lenet)
@@ -162,8 +155,8 @@ class TestImperativeOutSclae(unittest.TestCase):
            reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32, drop_last=True
            )
-            adam = AdamOptimizer(
+            adam = paddle.optimizer.Adam(
-                learning_rate=lr, parameter_list=lenet.parameters()
+                learning_rate=lr, parameters=lenet.parameters()
            )
            loss_list = train_lenet(lenet, reader, adam)
            lenet.eval()
@@ -186,8 +179,8 @@ class TestImperativeOutSclae(unittest.TestCase):
            reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32, drop_last=True
            )
-            adam = AdamOptimizer(
+            adam = paddle.optimizer.Adam(
-                learning_rate=lr, parameter_list=lenet.parameters()
+                learning_rate=lr, parameters=lenet.parameters()
            )
            loss_list = train_lenet(lenet, reader, adam)
            lenet.eval()

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
+#   copyright (c) 2022 paddlepaddle authors. all rights reserved.
 #
 # licensed under the apache license, version 2.0 (the "license");
 # you may not use this file except in compliance with the license.
@@ -12,29 +12,32 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
+import logging
 import os
-import numpy as np
+import tempfile
-import random
-import shutil
 import time
 import unittest
-import copy
-import logging
-import tempfile
-import paddle.nn as nn
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.contrib.slim.quantization import *
-from paddle.fluid.log_helper import get_logger
-from paddle.dataset.common import download
+import numpy as np
 from imperative_test_utils import (
-    fix_model_dict,
    ImperativeLenet,
    ImperativeLinearBn,
+    ImperativeLinearBn_hook,
 )
-from imperative_test_utils import ImperativeLinearBn_hook
+import paddle
+import paddle.nn as nn
+from paddle.dataset.common import download
+from paddle.fluid.framework import _test_eager_guard
+from paddle.quantization import (
+    AbsmaxQuantizer,
+    HistQuantizer,
+    ImperativePTQ,
+    KLQuantizer,
+    PerChannelAbsmaxQuantizer,
+    PTQConfig,
+)
+from paddle.static.log_helper import get_logger
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
@@ -149,8 +152,8 @@ class TestImperativePTQ(unittest.TestCase):
            label = paddle.to_tensor(y_data)
            out = model(img)
-            acc_top1 = paddle.static.accuracy(input=out, label=label, k=1)
+            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
-            acc_top5 = paddle.static.accuracy(input=out, label=label, k=5)
+            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
            eval_acc_top1_list.append(float(acc_top1.numpy()))
            if batch_id % 50 == 0:
@@ -207,7 +210,7 @@ class TestImperativePTQ(unittest.TestCase):
                break
        return top1_correct_num / total_num
-    def test_ptq(self):
+    def func_ptq(self):
        start_time = time.time()
        self.set_vars()
@@ -265,9 +268,14 @@ class TestImperativePTQ(unittest.TestCase):
            end_time = time.time()
            print("total time: %ss \n" % (end_time - start_time))
+    def test_ptq(self):
+        with _test_eager_guard():
+            self.func_ptq()
+        self.func_ptq()
 class TestImperativePTQfuse(TestImperativePTQ):
-    def test_ptq(self):
+    def func_ptq(self):
        start_time = time.time()
        self.set_vars()
@@ -336,6 +344,11 @@ class TestImperativePTQfuse(TestImperativePTQ):
            end_time = time.time()
            print("total time: %ss \n" % (end_time - start_time))
+    def test_ptq(self):
+        with _test_eager_guard():
+            self.func_ptq()
+        self.func_ptq()
 class TestImperativePTQHist(TestImperativePTQ):
    def set_vars(self):

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
@@ -12,34 +12,34 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
+import logging
 import os
-import numpy as np
-import random
-import time
 import tempfile
 import unittest
-import logging
+import numpy as np
+from imperative_test_utils import ImperativeLenet, fix_model_dict
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
+from paddle.framework import core, set_flags
-from paddle.fluid.optimizer import AdamOptimizer
+from paddle.nn import Conv2D, Conv2DTranspose
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.nn import Sequential
-from paddle.nn import Linear, Conv2D, Softmax, Conv2DTranspose
-from paddle.fluid.log_helper import get_logger
-from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.nn.quant.quant_layers import (
    QuantizedConv2D,
    QuantizedConv2DTranspose,
 )
-from imperative_test_utils import fix_model_dict, ImperativeLenet
+from paddle.optimizer import Adam
+from paddle.quantization import ImperativeQuantAware
+from paddle.static.log_helper import get_logger
+INFER_MODEL_SUFFIX = ".pdmodel"
+INFER_PARAMS_SUFFIX = ".pdiparams"
 paddle.enable_static()
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+    set_flags({"FLAGS_cudnn_deterministic": True})
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
@@ -84,7 +84,7 @@ class TestImperativeQat(unittest.TestCase):
            )
            quant_conv1 = QuantizedConv2D(conv1)
            data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
-            quant_conv1(fluid.dygraph.to_variable(data))
+            quant_conv1(paddle.to_tensor(data))
            conv_transpose = Conv2DTranspose(4, 6, (3, 3))
            quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose)
@@ -95,15 +95,13 @@ class TestImperativeQat(unittest.TestCase):
            seed = 1
            np.random.seed(seed)
-            fluid.default_main_program().random_seed = seed
+            paddle.static.default_main_program().random_seed = seed
-            fluid.default_startup_program().random_seed = seed
+            paddle.static.default_startup_program().random_seed = seed
            lenet = ImperativeLenet()
            lenet = fix_model_dict(lenet)
            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
+            adam = Adam(learning_rate=0.001, parameters=lenet.parameters())
-                learning_rate=0.001, parameter_list=lenet.parameters()
-            )
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=32, drop_last=True
@@ -125,10 +123,10 @@ class TestImperativeQat(unittest.TestCase):
                        .reshape(-1, 1)
                    )
-                    img = fluid.dygraph.to_variable(x_data)
+                    img = paddle.to_tensor(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
+                    label = paddle.to_tensor(y_data)
                    out = lenet(img)
-                    acc = paddle.static.accuracy(out, label)
+                    acc = paddle.metric.accuracy(out, label)
                    loss = paddle.nn.functional.cross_entropy(
                        out, label, reduction='none', use_softmax=False
                    )
@@ -157,14 +155,14 @@ class TestImperativeQat(unittest.TestCase):
                        .reshape(-1, 1)
                    )
-                    img = fluid.dygraph.to_variable(x_data)
+                    img = paddle.to_tensor(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
+                    label = paddle.to_tensor(y_data)
                    out = lenet(img)
-                    acc_top1 = paddle.static.accuracy(
+                    acc_top1 = paddle.metric.accuracy(
                        input=out, label=label, k=1
                    )
-                    acc_top5 = paddle.static.accuracy(
+                    acc_top5 = paddle.metric.accuracy(
                        input=out, label=label, k=5
                    )
@@ -197,11 +195,11 @@ class TestImperativeQat(unittest.TestCase):
            y_data = (
                np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
            )
-            test_img = fluid.dygraph.to_variable(test_data)
+            test_img = paddle.to_tensor(test_data)
-            label = fluid.dygraph.to_variable(y_data)
+            label = paddle.to_tensor(y_data)
            lenet.eval()
            fp32_out = lenet(test_img)
-            fp32_acc = paddle.static.accuracy(fp32_out, label).numpy()
+            fp32_acc = paddle.metric.accuracy(fp32_out, label).numpy()
        with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir:
            # save inference quantized model
@@ -220,13 +218,13 @@ class TestImperativeQat(unittest.TestCase):
                place = core.CUDAPlace(0)
            else:
                place = core.CPUPlace()
-            exe = fluid.Executor(place)
+            exe = paddle.static.Executor(place)
            [
                inference_program,
                feed_target_names,
                fetch_targets,
-            ] = fluid.io.load_inference_model(
+            ] = paddle.static.load_inference_model(
-                dirname=tmpdir,
+                tmpdir,
                executor=exe,
                model_filename="lenet" + INFER_MODEL_SUFFIX,
                params_filename="lenet" + INFER_PARAMS_SUFFIX,
@@ -237,8 +235,8 @@ class TestImperativeQat(unittest.TestCase):
                fetch_list=fetch_targets,
            )
            paddle.disable_static()
-            quant_out = fluid.dygraph.to_variable(quant_out)
+            quant_out = paddle.to_tensor(quant_out)
-            quant_acc = paddle.static.accuracy(quant_out, label).numpy()
+            quant_acc = paddle.metric.accuracy(quant_out, label).numpy()
            paddle.enable_static()
            delta_value = fp32_acc - quant_acc
            self.assertLessEqual(delta_value, self.diff_threshold)

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
+#   copyright (c) 2022 paddlepaddle authors. all rights reserved.
 #
 # licensed under the apache license, version 2.0 (the "license");
 # you may not use this file except in compliance with the license.
@@ -12,25 +12,25 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
+import logging
 import os
-import numpy as np
+import tempfile
-import random
-import shutil
 import time
 import unittest
-import logging
-import tempfile
+import numpy as np
+from imperative_test_utils import ImperativeLenet
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.fluid.log_helper import get_logger
 from paddle.dataset.common import download
-from imperative_test_utils import fix_model_dict, ImperativeLenet
+from paddle.framework import set_flags
+from paddle.quantization import ImperativeQuantAware
+from paddle.static.log_helper import get_logger
 os.environ["CPU_NUM"] = "1"
 if paddle.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+    set_flags({"FLAGS_cudnn_deterministic": True})
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
@@ -117,7 +117,7 @@ class TestImperativeQatAmp(unittest.TestCase):
            if use_amp:
                with paddle.amp.auto_cast():
                    out = model(img)
-                    acc = paddle.static.accuracy(out, label)
+                    acc = paddle.metric.accuracy(out, label)
                    loss = paddle.nn.functional.cross_entropy(
                        out, label, reduction='none', use_softmax=False
                    )
@@ -129,7 +129,7 @@ class TestImperativeQatAmp(unittest.TestCase):
                adam.clear_gradients()
            else:
                out = model(img)
-                acc = paddle.static.accuracy(out, label)
+                acc = paddle.metric.accuracy(out, label)
                loss = paddle.nn.functional.cross_entropy(
                    out, label, reduction='none', use_softmax=False
                )
@@ -170,8 +170,8 @@ class TestImperativeQatAmp(unittest.TestCase):
            with paddle.amp.auto_cast(use_amp):
                out = model(img)
-                acc_top1 = paddle.static.accuracy(input=out, label=label, k=1)
+                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
-                acc_top5 = paddle.static.accuracy(input=out, label=label, k=5)
+                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
            acc_top1_list.append(float(acc_top1.numpy()))
            if batch_id % 100 == 0:

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
+#   copyright (c) 2022 paddlepaddle authors. all rights reserved.
 #
 # licensed under the apache license, version 2.0 (the "license");
 # you may not use this file except in compliance with the license.
@@ -13,27 +13,18 @@
 # limitations under the license.
 import os
-import numpy as np
-import random
 import unittest
-import logging
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid.log_helper import get_logger
 from test_imperative_qat import TestImperativeQat
+import paddle
+from paddle.framework import core, set_flags
 paddle.enable_static()
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+    set_flags({"FLAGS_cudnn_deterministic": True})
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
-)
 class TestImperativeQatChannelWise(TestImperativeQat):

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_fuse.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_fuse.py
@@ -13,27 +13,18 @@
 # limitations under the license.
 import os
-import numpy as np
-import random
 import unittest
-import logging
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid.log_helper import get_logger
 from test_imperative_qat import TestImperativeQat
+import paddle
+from paddle.framework import core, set_flags
 paddle.enable_static()
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+    set_flags({"FLAGS_cudnn_deterministic": True})
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
-)
 class TestImperativeQatfuseBN(TestImperativeQat):

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_lsq.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_lsq.py
@@ -12,57 +12,53 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
+import logging
 import os
-import numpy as np
-import random
-import time
-import tempfile
 import unittest
-import logging
+import numpy as np
+from imperative_test_utils import fix_model_dict
 import paddle
-import paddle.fluid as fluid
+from paddle.framework import core, set_flags
-from paddle.fluid import core
+from paddle.nn import (
-from paddle.fluid.optimizer import (
+    BatchNorm2D,
-    SGDOptimizer,
+    Conv2D,
-    AdamOptimizer,
+    LeakyReLU,
-    MomentumOptimizer,
+    Linear,
+    MaxPool2D,
+    PReLU,
+    ReLU,
+    Sequential,
+    Sigmoid,
+    Softmax,
 )
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
+from paddle.quantization import ImperativeQuantAware
-from paddle.nn import Sequential
+from paddle.static.log_helper import get_logger
-from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
-from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
-from paddle.fluid.log_helper import get_logger
-from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-from paddle.nn.quant.quant_layers import (
-    QuantizedConv2D,
-    QuantizedConv2DTranspose,
-)
-from imperative_test_utils import fix_model_dict
 paddle.enable_static()
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+    set_flags({"FLAGS_cudnn_deterministic": True})
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
 )
-class ImperativeLenet(fluid.dygraph.Layer):
+class ImperativeLenet(paddle.nn.Layer):
    def __init__(self, num_classes=10):
        super().__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
        self.features = Sequential(
            Conv2D(
                in_channels=1,
@@ -116,7 +112,7 @@ class ImperativeLenet(fluid.dygraph.Layer):
    def forward(self, inputs):
        x = self.features(inputs)
-        x = paddle.flatten(x, 1, -1)
+        x = paddle.flatten(x, 1)
        x = self.fc(x)
        return x
@@ -139,14 +135,14 @@ class TestImperativeQatLSQ(unittest.TestCase):
        seed = 100
        np.random.seed(seed)
-        fluid.default_main_program().random_seed = seed
+        paddle.static.default_main_program().random_seed = seed
-        fluid.default_startup_program().random_seed = seed
+        paddle.static.default_startup_program().random_seed = seed
        paddle.disable_static()
        lenet = ImperativeLenet()
        lenet = fix_model_dict(lenet)
        imperative_qat.quantize(lenet)
-        optimizer = MomentumOptimizer(
+        optimizer = paddle.optimizer.Momentum(
-            learning_rate=0.1, parameter_list=lenet.parameters(), momentum=0.9
+            learning_rate=0.1, parameters=lenet.parameters(), momentum=0.9
        )
        train_reader = paddle.batch(
@@ -166,10 +162,10 @@ class TestImperativeQatLSQ(unittest.TestCase):
                    .reshape(-1, 1)
                )
-                img = fluid.dygraph.to_variable(x_data)
+                img = paddle.to_tensor(x_data)
-                label = fluid.dygraph.to_variable(y_data)
+                label = paddle.to_tensor(y_data)
                out = lenet(img)
-                acc = paddle.static.accuracy(out, label)
+                acc = paddle.metric.accuracy(out, label)
                loss = paddle.nn.functional.cross_entropy(
                    out, label, reduction='none', use_softmax=False
                )
@@ -199,14 +195,14 @@ class TestImperativeQatLSQ(unittest.TestCase):
                        .astype('int64')
                        .reshape(-1, 1)
                    )
-                    img = fluid.dygraph.to_variable(x_data)
+                    img = paddle.to_tensor(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
+                    label = paddle.to_tensor(y_data)
                    out = lenet(img)
-                    acc_top1 = paddle.static.accuracy(
+                    acc_top1 = paddle.metric.accuracy(
                        input=out, label=label, k=1
                    )
-                    acc_top5 = paddle.static.accuracy(
+                    acc_top5 = paddle.metric.accuracy(
                        input=out, label=label, k=5
                    )

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_matmul.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_matmul.py
@@ -12,57 +12,55 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
+import logging
 import os
-import numpy as np
-import random
-import time
-import tempfile
 import unittest
-import logging
+import numpy as np
+from imperative_test_utils import fix_model_dict
 import paddle
-import paddle.fluid as fluid
+from paddle.framework import core, set_flags
-from paddle.fluid import core
+from paddle.nn import (
-from paddle.fluid.optimizer import (
+    BatchNorm2D,
-    SGDOptimizer,
+    Conv2D,
-    AdamOptimizer,
+    LeakyReLU,
-    MomentumOptimizer,
+    Linear,
+    MaxPool2D,
+    PReLU,
+    ReLU,
+    Sequential,
+    Sigmoid,
+    Softmax,
 )
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
+from paddle.nn.quant.quant_layers import QuantizedMatmul
-from paddle.nn import Sequential
+from paddle.optimizer import Momentum
-from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
+from paddle.quantization import ImperativeQuantAware
-from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
+from paddle.static.log_helper import get_logger
-from paddle.fluid.log_helper import get_logger
-from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-from paddle.nn.quant.quant_layers import (
-    QuantizedConv2D,
-    QuantizedMatmul,
-)
-from imperative_test_utils import fix_model_dict
 paddle.enable_static()
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+    set_flags({"FLAGS_cudnn_deterministic": True})
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
 )
-class ImperativeLenet(fluid.dygraph.Layer):
+class ImperativeLenet(paddle.nn.Layer):
    def __init__(self, num_classes=10):
        super().__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
        self.features = Sequential(
            Conv2D(
                in_channels=1,
@@ -140,15 +138,15 @@ class TestImperativeQatMatmul(unittest.TestCase):
        seed = 100
        np.random.seed(seed)
-        fluid.default_main_program().random_seed = seed
+        paddle.static.default_main_program().random_seed = seed
-        fluid.default_startup_program().random_seed = seed
+        paddle.static.default_startup_program().random_seed = seed
        paddle.disable_static()
        lenet = ImperativeLenet()
        lenet = fix_model_dict(lenet)
        imperative_qat.quantize(lenet)
-        optimizer = MomentumOptimizer(
+        optimizer = Momentum(
-            learning_rate=0.1, parameter_list=lenet.parameters(), momentum=0.9
+            learning_rate=0.1, parameters=lenet.parameters(), momentum=0.9
        )
        train_reader = paddle.batch(
@@ -168,18 +166,18 @@ class TestImperativeQatMatmul(unittest.TestCase):
                    .reshape(-1, 1)
                )
-                img = fluid.dygraph.to_variable(x_data)
+                img = paddle.to_tensor(x_data)
-                label = fluid.dygraph.to_variable(y_data)
+                label = paddle.to_tensor(y_data)
                out = lenet(img)
-                acc = paddle.static.accuracy(out, label)
+                acc = paddle.metric.accuracy(out, label)
                loss = paddle.nn.functional.cross_entropy(
                    out, label, reduction='none', use_softmax=False
                )
                avg_loss = paddle.mean(loss)
                avg_loss.backward()
-                optimizer.minimize(avg_loss)
+                optimizer.step()
-                lenet.clear_gradients()
+                optimizer.clear_grad()
                if batch_id % 100 == 0:
                    _logger.info(
@@ -201,14 +199,14 @@ class TestImperativeQatMatmul(unittest.TestCase):
                        .astype('int64')
                        .reshape(-1, 1)
                    )
-                    img = fluid.dygraph.to_variable(x_data)
+                    img = paddle.to_tensor(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
+                    label = paddle.to_tensor(y_data)
                    out = lenet(img)
-                    acc_top1 = paddle.static.accuracy(
+                    acc_top1 = paddle.metric.accuracy(
                        input=out, label=label, k=1
                    )
-                    acc_top5 = paddle.static.accuracy(
+                    acc_top5 = paddle.metric.accuracy(
                        input=out, label=label, k=5
                    )

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
@@ -12,20 +12,19 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
+import logging
 import os
-import numpy as np
-import random
 import unittest
-import logging
+import numpy as np
 import paddle
 import paddle.nn as nn
-from paddle.optimizer import Adam
+from paddle.fluid.framework import _test_eager_guard
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.nn import Sequential
-from paddle.nn import Linear
+from paddle.optimizer import Adam
-from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose
+from paddle.quantization import ImperativeQuantAware
-from paddle.fluid.log_helper import get_logger
+from paddle.static.log_helper import get_logger
 os.environ["CPU_NUM"] = "1"
@@ -110,7 +109,7 @@ class ModelForConv2dT(nn.Layer):
    def __init__(self, num_classes=10):
        super().__init__()
        self.features = nn.Conv2DTranspose(4, 6, (3, 3))
-        self.fc = Linear(600, num_classes)
+        self.fc = nn.Linear(in_features=600, out_features=num_classes)
    def forward(self, inputs):
        x = self.features(inputs)
@@ -123,28 +122,28 @@ class ImperativeLenet(paddle.nn.Layer):
    def __init__(self, num_classes=10, classifier_activation='softmax'):
        super().__init__()
        self.features = Sequential(
-            paddle.nn.Conv2D(
+            nn.Conv2D(
                in_channels=1,
                out_channels=6,
                kernel_size=3,
                stride=1,
                padding=1,
            ),
-            paddle.nn.MaxPool2D(kernel_size=2, stride=2),
+            nn.MaxPool2D(kernel_size=2, stride=2),
-            paddle.nn.Conv2D(
+            nn.Conv2D(
                in_channels=6,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=0,
            ),
-            paddle.nn.MaxPool2D(kernel_size=2, stride=2),
+            nn.MaxPool2D(kernel_size=2, stride=2),
        )
        self.fc = Sequential(
-            Linear(400, 120),
+            nn.Linear(in_features=400, out_features=120),
-            Linear(120, 84),
+            nn.Linear(in_features=120, out_features=84),
-            Linear(84, num_classes),
+            nn.Linear(in_features=84, out_features=num_classes),
        )
    def forward(self, inputs):
@@ -160,7 +159,7 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
        _logger.info("test act_preprocess")
        self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT)
-    def test_quant_aware_training(self):
+    def func_quant_aware_training(self):
        imperative_qat = self.imperative_qat
        seed = 1
        np.random.seed(seed)
@@ -170,8 +169,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
        fixed_state = {}
        param_init_map = {}
        for name, param in lenet.named_parameters():
-            p_shape = param.numpy().shape
+            p_shape = np.array(param).shape
-            p_value = param.numpy()
+            p_value = np.array(param)
            if name.endswith("bias"):
                value = np.zeros_like(p_value).astype('float32')
            else:
@@ -217,8 +216,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
                    loss = nn.functional.loss.cross_entropy(out, label)
                    avg_loss = paddle.mean(loss)
                    avg_loss.backward()
-                    adam.minimize(avg_loss)
+                    adam.step()
-                    model.clear_gradients()
+                    adam.clear_grad()
                    if batch_id % 50 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".format(
@@ -262,6 +261,11 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
        train(lenet)
        test(lenet)
+    def test_quant_aware_training(self):
+        with _test_eager_guard():
+            self.func_quant_aware_training()
+        self.func_quant_aware_training()
 class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess):
    def setUp(self):

--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
+#   copyright (c) 2022 paddlepaddle authors. all rights reserved.
 #
 # licensed under the apache license, version 2.0 (the "license");
 # you may not use this file except in compliance with the license.
@@ -13,34 +13,25 @@
 # limitations under the license.
 import os
-import numpy as np
-import random
 import unittest
-import logging
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from paddle.fluid import core
-from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6
-from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
-from paddle.fluid.log_helper import get_logger
+import numpy as np
 from imperative_test_utils import (
+    ImperativeLenetWithSkipQuant,
    fix_model_dict,
    train_lenet,
-    ImperativeLenetWithSkipQuant,
 )
+import paddle
+from paddle.framework import core, set_flags
+from paddle.optimizer import Adam
+from paddle.quantization import ImperativeQuantAware
+INFER_MODEL_SUFFIX = ".pdmodel"
+INFER_PARAMS_SUFFIX = ".pdiparams"
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
+    set_flags({"FLAGS_cudnn_deterministic": True})
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
-)
 class TestImperativeOutSclae(unittest.TestCase):
@@ -60,9 +51,7 @@ class TestImperativeOutSclae(unittest.TestCase):
        lenet = fix_model_dict(lenet)
        qat.quantize(lenet)
-        adam = AdamOptimizer(
+        adam = Adam(learning_rate=lr, parameters=lenet.parameters())
-            learning_rate=lr, parameter_list=lenet.parameters()
-        )
        dynamic_loss_rec = []
        lenet.train()
        loss_list = train_lenet(lenet, reader, adam)
@@ -88,14 +77,14 @@ class TestImperativeOutSclae(unittest.TestCase):
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = paddle.static.Executor(place)
        [
            inference_program,
            feed_target_names,
            fetch_targets,
-        ] = fluid.io.load_inference_model(
+        ] = paddle.static.load_inference_model(
-            dirname=save_dir,
+            save_dir,
            executor=exe,
            model_filename="lenet" + INFER_MODEL_SUFFIX,
            params_filename="lenet" + INFER_PARAMS_SUFFIX,

--- a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 import unittest
 import numpy as np
 import paddle
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid import core
 import paddle.nn.quant.quant_layers as quant_layers
+from paddle.framework import core
 paddle.enable_static()
@@ -38,23 +38,23 @@ def init_data(batch_size=32, img_shape=[784], label_range=9):
 class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
    def check_backward(self, use_cuda):
-        main_program = fluid.Program()
+        main_program = paddle.static.Program()
-        startup_program = fluid.Program()
+        startup_program = paddle.static.Program()
-        with fluid.program_guard(main_program, startup_program):
+        with paddle.static.program_guard(main_program, startup_program):
-            image = fluid.layers.data(
+            image = paddle.static.data(
-                name='image', shape=[784], dtype='float32'
+                name='image', shape=[-1, 784], dtype='float32'
            )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            label = paddle.static.data(
-            fc_tmp = fluid.layers.fc(image, size=10, act='softmax')
+                name='label', shape=[-1, 1], dtype='int64'
+            )
+            fc_tmp = paddle.static.nn.fc(image, size=10, activation='softmax')
            out_scale = quant_layers.MovingAverageAbsMaxScale(
                name=fc_tmp.name, dtype=fc_tmp.dtype
            )
            fc_tmp_1 = out_scale(fc_tmp)
-            cross_entropy = paddle.nn.functional.softmax_with_cross_entropy(
+            cross_entropy = paddle.nn.functional.cross_entropy(fc_tmp, label)
-                fc_tmp, label
-            )
            loss = paddle.mean(cross_entropy)
-            sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+            sgd = paddle.optimizer.SGD(learning_rate=1e-3)
            sgd.minimize(loss)
        moving_average_abs_max_scale_ops = [
@@ -66,13 +66,13 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
            len(moving_average_abs_max_scale_ops) == 1
        ), "The number of moving_average_abs_max_scale_ops should be 1."
-        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+        place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = paddle.static.Executor(place)
        exe.run(startup_program)
-        binary = fluid.compiler.CompiledProgram(
+        binary = paddle.static.CompiledProgram(main_program).with_data_parallel(
-            main_program
+            loss_name=loss.name
-        ).with_data_parallel(loss_name=loss.name)
+        )
        img, label = init_data()
        feed_dict = {"image": img, "label": label}

--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_program_resnet50.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_program_resnet50.py
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_while.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_while.py
--- a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
--- a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
--- a/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py
--- a/python/paddle/fluid/contrib/slim/quantization/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/utils.py
--- a/python/setup.py.in
+++ b/python/setup.py.in
--- a/setup.py
+++ b/setup.py
--- a/tools/sampcd_processor.py
+++ b/tools/sampcd_processor.py
@@ -486,7 +486,7 @@ def get_filenames(full_test=False):
    '''
    global whl_error
    import paddle  # noqa: F401
-    import paddle.fluid.contrib.slim.quantization  # noqa: F401
+    import paddle.static.quantization  # noqa: F401
    whl_error = []
    if full_test: