Move QAT API from PaddleSlim to Paddle (#53591)

* Copy QAT files from PaddleSlim * Integrate QAT API into Paddle * Replace eval function * Reduce test_quant_aware run time * Apply new formatter on modified files * Remove code check for Paddle version check * Copy quant_post_quant_aware UT from PaddleSlim * Integrate test_quant_post_quant_aware UT into PaddlePaddle * Apply new formatter on modified files * Remove redundant code and add unittests * Add new unittests * Update the time limit of new unittests

Move QAT API from PaddleSlim to Paddle (#53591)
* Copy QAT files from PaddleSlim * Integrate QAT API into Paddle * Replace eval function * Reduce test_quant_aware run time * Apply new formatter on modified files * Remove code check for Paddle version check * Copy quant_post_quant_aware UT from PaddleSlim * Integrate test_quant_post_quant_aware UT into PaddlePaddle * Apply new formatter on modified files * Remove redundant code and add unittests * Add new unittests * Update the time limit of new unittests
f2f9b0c2 · Leo Chen · GitHub · da50a009 · f2f9b0c2 · f2f9b0c2
7 changed file
--- a/python/paddle/static/quantization/__init__.py
+++ b/python/paddle/static/quantization/__init__.py
@@ -64,3 +64,7 @@ from .post_training_quantization import (
 from .post_training_quantization import (
    WeightQuantization,
 )
+from .quanter import (
+    quant_aware,
+    convert,
+)
--- a/python/paddle/static/quantization/quanter.py
+++ b/python/paddle/static/quantization/quanter.py
--- a/test/quantization/CMakeLists.txt
+++ b/test/quantization/CMakeLists.txt
@@ -227,6 +227,10 @@ if(WIN32)
  list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp)
  list(REMOVE_ITEM TEST_OPS test_imperative_qat_lsq)
  list(REMOVE_ITEM TEST_OPS test_imperative_qat_matmul)
+  list(REMOVE_ITEM TEST_OPS test_quant_aware)
+  list(REMOVE_ITEM TEST_OPS test_quant_post_quant_aware)
+  list(REMOVE_ITEM TEST_OPS test_quant_aware_user_defined)
+  list(REMOVE_ITEM TEST_OPS test_quant_aware_config)
 endif()
@@ -484,6 +488,10 @@ if(NOT WIN32)
  set_tests_properties(test_imperative_ptq PROPERTIES TIMEOUT 120)
  set_tests_properties(test_weight_quantization_mobilenetv1 PROPERTIES TIMEOUT
                                                                       120)
+  set_tests_properties(test_quant_aware PROPERTIES TIMEOUT 900)
+  set_tests_properties(test_quant_post_quant_aware PROPERTIES TIMEOUT 900)
+  set_tests_properties(test_quant_aware_user_defined PROPERTIES TIMEOUT 900)
+  set_tests_properties(test_quant_aware_config PROPERTIES TIMEOUT 900)
 endif()
 set_tests_properties(test_graph PROPERTIES TIMEOUT 120)

--- a/test/quantization/test_quant_aware.py
+++ b/test/quantization/test_quant_aware.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+import numpy as np
+import paddle
+from paddle.nn.initializer import KaimingUniform
+from paddle.static.quantization.quanter import convert, quant_aware
+train_parameters = {
+    "input_size": [3, 224, 224],
+    "input_mean": [0.485, 0.456, 0.406],
+    "input_std": [0.229, 0.224, 0.225],
+    "learning_strategy": {
+        "name": "piecewise_decay",
+        "batch_size": 256,
+        "epochs": [10, 16, 30],
+        "steps": [0.1, 0.01, 0.001, 0.0001],
+    },
+}
+class MobileNet:
+    def __init__(self):
+        self.params = train_parameters
+    def net(self, input, class_dim=1000, scale=1.0):
+        # conv1: 112x112
+        input = self.conv_bn_layer(
+            input,
+            filter_size=3,
+            channels=3,
+            num_filters=int(32 * scale),
+            stride=2,
+            padding=1,
+            name="conv1",
+        )
+        # 56x56
+        input = self.depthwise_separable(
+            input,
+            num_filters1=32,
+            num_filters2=64,
+            num_groups=32,
+            stride=1,
+            scale=scale,
+            name="conv2_1",
+        )
+        input = self.depthwise_separable(
+            input,
+            num_filters1=64,
+            num_filters2=128,
+            num_groups=64,
+            stride=2,
+            scale=scale,
+            name="conv2_2",
+        )
+        # 28x28
+        input = self.depthwise_separable(
+            input,
+            num_filters1=128,
+            num_filters2=128,
+            num_groups=128,
+            stride=1,
+            scale=scale,
+            name="conv3_1",
+        )
+        input = self.depthwise_separable(
+            input,
+            num_filters1=128,
+            num_filters2=256,
+            num_groups=128,
+            stride=2,
+            scale=scale,
+            name="conv3_2",
+        )
+        # 14x14
+        input = self.depthwise_separable(
+            input,
+            num_filters1=256,
+            num_filters2=256,
+            num_groups=256,
+            stride=1,
+            scale=scale,
+            name="conv4_1",
+        )
+        input = self.depthwise_separable(
+            input,
+            num_filters1=256,
+            num_filters2=512,
+            num_groups=256,
+            stride=2,
+            scale=scale,
+            name="conv4_2",
+        )
+        # 14x14
+        for i in range(5):
+            input = self.depthwise_separable(
+                input,
+                num_filters1=512,
+                num_filters2=512,
+                num_groups=512,
+                stride=1,
+                scale=scale,
+                name="conv5" + "_" + str(i + 1),
+            )
+        # 7x7
+        input = self.depthwise_separable(
+            input,
+            num_filters1=512,
+            num_filters2=1024,
+            num_groups=512,
+            stride=2,
+            scale=scale,
+            name="conv5_6",
+        )
+        input = self.depthwise_separable(
+            input,
+            num_filters1=1024,
+            num_filters2=1024,
+            num_groups=1024,
+            stride=1,
+            scale=scale,
+            name="conv6",
+        )
+        input = paddle.nn.functional.adaptive_avg_pool2d(input, 1)
+        with paddle.static.name_scope('last_fc'):
+            output = paddle.static.nn.fc(
+                input,
+                class_dim,
+                weight_attr=paddle.ParamAttr(
+                    initializer=KaimingUniform(), name="fc7_weights"
+                ),
+                bias_attr=paddle.ParamAttr(name="fc7_offset"),
+            )
+        return output
+    def conv_bn_layer(
+        self,
+        input,
+        filter_size,
+        num_filters,
+        stride,
+        padding,
+        channels=None,
+        num_groups=1,
+        act='relu',
+        use_cudnn=True,
+        name=None,
+    ):
+        conv = paddle.static.nn.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            act=None,
+            use_cudnn=use_cudnn,
+            param_attr=paddle.ParamAttr(
+                initializer=KaimingUniform(), name=name + "_weights"
+            ),
+            bias_attr=False,
+        )
+        bn_name = name + "_bn"
+        return paddle.static.nn.batch_norm(
+            input=conv,
+            act=act,
+            param_attr=paddle.ParamAttr(name=bn_name + "_scale"),
+            bias_attr=paddle.ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance',
+        )
+    def depthwise_separable(
+        self,
+        input,
+        num_filters1,
+        num_filters2,
+        num_groups,
+        stride,
+        scale,
+        name=None,
+    ):
+        depthwise_conv = self.conv_bn_layer(
+            input=input,
+            filter_size=3,
+            num_filters=int(num_filters1 * scale),
+            stride=stride,
+            padding=1,
+            num_groups=int(num_groups * scale),
+            use_cudnn=False,
+            name=name + "_dw",
+        )
+        pointwise_conv = self.conv_bn_layer(
+            input=depthwise_conv,
+            filter_size=1,
+            num_filters=int(num_filters2 * scale),
+            stride=1,
+            padding=0,
+            name=name + "_sep",
+        )
+        return pointwise_conv
+class StaticCase(unittest.TestCase):
+    def setUp(self):
+        # switch mode
+        paddle.enable_static()
+class TestQuantAwareCase(StaticCase):
+    def test_accuracy(self):
+        image = paddle.static.data(
+            name='image', shape=[None, 1, 28, 28], dtype='float32'
+        )
+        label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+        model = MobileNet()
+        out = model.net(input=image, class_dim=10)
+        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
+        avg_cost = paddle.mean(x=cost)
+        acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+        acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
+        optimizer = paddle.optimizer.Momentum(
+            momentum=0.9,
+            learning_rate=0.01,
+            weight_decay=paddle.regularizer.L2Decay(4e-5),
+        )
+        optimizer.minimize(avg_cost)
+        main_prog = paddle.static.default_main_program()
+        val_prog = paddle.static.default_main_program().clone(for_test=True)
+        place = (
+            paddle.CUDAPlace(0)
+            if paddle.is_compiled_with_cuda()
+            else paddle.CPUPlace()
+        )
+        exe = paddle.static.Executor(place)
+        exe.run(paddle.static.default_startup_program())
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform
+        )
+        test_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform
+        )
+        batch_size = 64 if os.environ.get('DATASET') == 'full' else 8
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=place,
+            feed_list=[image, label],
+            drop_last=True,
+            return_list=False,
+            batch_size=batch_size,
+        )
+        valid_loader = paddle.io.DataLoader(
+            test_dataset,
+            places=place,
+            feed_list=[image, label],
+            batch_size=batch_size,
+            return_list=False,
+        )
+        def train(program):
+            iter = 0
+            stop_iter = None if os.environ.get('DATASET') == 'full' else 10
+            for data in train_loader():
+                cost, top1, top5 = exe.run(
+                    program,
+                    feed=data,
+                    fetch_list=[avg_cost, acc_top1, acc_top5],
+                )
+                iter += 1
+                if iter % 100 == 0:
+                    print(
+                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                            iter, cost, top1, top5
+                        )
+                    )
+                if stop_iter is not None and iter == stop_iter:
+                    break
+        def test(program):
+            iter = 0
+            stop_iter = None if os.environ.get('DATASET') == 'full' else 10
+            result = [[], [], []]
+            for data in valid_loader():
+                cost, top1, top5 = exe.run(
+                    program,
+                    feed=data,
+                    fetch_list=[avg_cost, acc_top1, acc_top5],
+                )
+                iter += 1
+                if iter % 100 == 0:
+                    print(
+                        'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                            iter, cost, top1, top5
+                        )
+                    )
+                result[0].append(cost)
+                result[1].append(top1)
+                result[2].append(top5)
+                if stop_iter is not None and iter == stop_iter:
+                    break
+            print(
+                ' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                    np.mean(result[0]), np.mean(result[1]), np.mean(result[2])
+                )
+            )
+            return np.mean(result[1]), np.mean(result[2])
+        train(main_prog)
+        top1_1, top5_1 = test(main_prog)
+        config = {
+            'weight_quantize_type': 'channel_wise_abs_max',
+            'activation_quantize_type': 'moving_average_abs_max',
+            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
+        }
+        quant_train_prog = quant_aware(main_prog, place, config, for_test=False)
+        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
+        op_nums_1, quant_op_nums_1 = self.get_op_number(quant_eval_prog)
+        # test quant_aware op numbers
+        self.assertEqual(op_nums_1 * 2, quant_op_nums_1)
+        train(quant_train_prog)
+        convert_eval_prog = convert(quant_eval_prog, place, config)
+        top1_2, top5_2 = test(convert_eval_prog)
+        # values before quantization and after quantization should be close
+        print(f"before quantization: top1: {top1_1}, top5: {top5_1}")
+        print(f"after quantization: top1: {top1_2}, top5: {top5_2}")
+        convert_op_nums_1, convert_quant_op_nums_1 = self.get_convert_op_number(
+            convert_eval_prog
+        )
+        # test convert op numbers
+        self.assertEqual(convert_op_nums_1 + 25, convert_quant_op_nums_1)
+        config['not_quant_pattern'] = ['last_fc']
+        quant_prog_2 = quant_aware(
+            main_prog, place, config=config, for_test=True
+        )
+        op_nums_2, quant_op_nums_2 = self.get_op_number(quant_prog_2)
+        convert_prog_2 = convert(quant_prog_2, place, config=config)
+        convert_op_nums_2, convert_quant_op_nums_2 = self.get_convert_op_number(
+            convert_prog_2
+        )
+        self.assertEqual(op_nums_1, op_nums_2)
+        # test skip_quant
+        self.assertEqual(quant_op_nums_1 - 2, quant_op_nums_2)
+        # The following assert will fail and is waiting for investigation.
+        # self.assertEqual(convert_quant_op_nums_1, convert_quant_op_nums_2)
+    def get_op_number(self, prog):
+        graph = paddle.fluid.framework.IrGraph(
+            paddle.framework.core.Graph(prog.desc), for_test=False
+        )
+        quant_op_nums = 0
+        op_nums = 0
+        for op in graph.all_op_nodes():
+            if op.name() in ['conv2d', 'depthwise_conv2d', 'mul']:
+                op_nums += 1
+            elif op.name() == 'quantize_linear':
+                quant_op_nums += 1
+        return op_nums, quant_op_nums
+    def get_convert_op_number(self, prog):
+        graph = paddle.fluid.framework.IrGraph(
+            paddle.framework.core.Graph(prog.desc), for_test=True
+        )
+        quant_op_nums = 0
+        op_nums = 0
+        dequant_num = 0
+        for op in graph.all_op_nodes():
+            if op.name() not in ['quantize_linear', 'dequantize_linear']:
+                op_nums += 1
+            elif op.name() == 'quantize_linear':
+                quant_op_nums += 1
+        return op_nums, quant_op_nums
+if __name__ == '__main__':
+    unittest.main()
--- a/test/quantization/test_quant_aware_config.py
+++ b/test/quantization/test_quant_aware_config.py
+# Copyright (c) 2023  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+import numpy as np
+from test_quant_aware import MobileNet
+import paddle
+from paddle.static.quantization.quanter import convert, quant_aware
+class TestQuantAwareBase(unittest.TestCase):
+    def setUp(self):
+        paddle.enable_static()
+    def get_save_int8(self):
+        return False
+    def generate_config(self):
+        config = {
+            'weight_quantize_type': 'channel_wise_abs_max',
+            'activation_quantize_type': 'moving_average_abs_max',
+            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
+            'onnx_format': False,
+        }
+        return config
+    def test_accuracy(self):
+        main_prog = paddle.static.Program()
+        with paddle.static.program_guard(main_prog):
+            image = paddle.static.data(
+                name='image', shape=[None, 1, 28, 28], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[None, 1], dtype='int64'
+            )
+            model = MobileNet()
+            out = model.net(input=image, class_dim=10)
+            cost = paddle.nn.functional.loss.cross_entropy(
+                input=out, label=label
+            )
+            avg_cost = paddle.mean(x=cost)
+            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
+            optimizer = paddle.optimizer.Momentum(
+                momentum=0.9,
+                learning_rate=0.01,
+                weight_decay=paddle.regularizer.L2Decay(4e-5),
+            )
+            optimizer.minimize(avg_cost)
+        val_prog = main_prog.clone(for_test=True)
+        place = (
+            paddle.CUDAPlace(0)
+            if paddle.is_compiled_with_cuda()
+            else paddle.CPUPlace()
+        )
+        exe = paddle.static.Executor(place)
+        exe.run(paddle.static.default_startup_program())
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform
+        )
+        test_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform
+        )
+        batch_size = 64 if os.environ.get('DATASET') == 'full' else 8
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=place,
+            feed_list=[image, label],
+            drop_last=True,
+            return_list=False,
+            batch_size=batch_size,
+        )
+        valid_loader = paddle.io.DataLoader(
+            test_dataset,
+            places=place,
+            feed_list=[image, label],
+            batch_size=batch_size,
+            return_list=False,
+        )
+        def train(program):
+            iter = 0
+            stop_iter = None if os.environ.get('DATASET') == 'full' else 10
+            for data in train_loader():
+                cost, top1, top5 = exe.run(
+                    program,
+                    feed=data,
+                    fetch_list=[avg_cost, acc_top1, acc_top5],
+                )
+                iter += 1
+                if iter % 100 == 0:
+                    print(
+                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                            iter, cost, top1, top5
+                        )
+                    )
+                if stop_iter is not None and iter == stop_iter:
+                    break
+        def test(program):
+            iter = 0
+            stop_iter = None if os.environ.get('DATASET') == 'full' else 10
+            result = [[], [], []]
+            for data in valid_loader():
+                cost, top1, top5 = exe.run(
+                    program,
+                    feed=data,
+                    fetch_list=[avg_cost, acc_top1, acc_top5],
+                )
+                iter += 1
+                if iter % 100 == 0:
+                    print(
+                        'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                            iter, cost, top1, top5
+                        )
+                    )
+                result[0].append(cost)
+                result[1].append(top1)
+                result[2].append(top5)
+                if stop_iter is not None and iter == stop_iter:
+                    break
+            print(
+                ' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                    np.mean(result[0]), np.mean(result[1]), np.mean(result[2])
+                )
+            )
+            return np.mean(result[1]), np.mean(result[2])
+        train(main_prog)
+        top1_1, top5_1 = test(main_prog)
+        config = self.generate_config()
+        quant_train_prog = quant_aware(main_prog, place, config, for_test=False)
+        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
+        train(quant_train_prog)
+        save_int8 = self.get_save_int8()
+        if save_int8:
+            convert_eval_prog, _ = convert(
+                quant_eval_prog, place, config, save_int8=save_int8
+            )
+        else:
+            convert_eval_prog = convert(
+                quant_eval_prog, place, config, save_int8=save_int8
+            )
+        top1_2, top5_2 = test(convert_eval_prog)
+        # values before quantization and after quantization should be close
+        print(f"before quantization: top1: {top1_1}, top5: {top5_1}")
+        print(f"after quantization: top1: {top1_2}, top5: {top5_2}")
+class TestQuantAwareNone(TestQuantAwareBase):
+    def generate_config(self):
+        config = None
+        return config
+class TestQuantAwareTRT(TestQuantAwareBase):
+    def generate_config(self):
+        config = {
+            'weight_quantize_type': 'channel_wise_abs_max',
+            'activation_quantize_type': 'moving_average_abs_max',
+            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
+            'onnx_format': False,
+            'for_tensorrt': True,
+        }
+        return config
+class TestQuantAwareFullQuantize(TestQuantAwareBase):
+    def generate_config(self):
+        config = {
+            'weight_quantize_type': 'channel_wise_abs_max',
+            'activation_quantize_type': 'moving_average_abs_max',
+            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
+            'onnx_format': False,
+            'is_full_quantize': True,
+        }
+        return config
+class TestQuantAwareSaveInt8(TestQuantAwareBase):
+    def generate_config(self):
+        config = {
+            'weight_quantize_type': 'channel_wise_abs_max',
+            'activation_quantize_type': 'moving_average_abs_max',
+            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
+            'onnx_format': False,
+        }
+        return config
+    def get_save_int8(self):
+        return True
+if __name__ == '__main__':
+    unittest.main()
--- a/test/quantization/test_quant_aware_user_defined.py
+++ b/test/quantization/test_quant_aware_user_defined.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+import numpy as np
+from test_quant_aware import MobileNet, StaticCase
+import paddle
+from paddle.static.quantization.quanter import convert, quant_aware
+def pact(x):
+    helper = paddle.fluid.layer_helper.LayerHelper("pact", **locals())
+    dtype = 'float32'
+    init_thres = 20
+    u_param_attr = paddle.ParamAttr(
+        name=x.name + '_pact',
+        initializer=paddle.nn.initializer.Constant(value=init_thres),
+        regularizer=paddle.regularizer.L2Decay(0.0001),
+        learning_rate=1,
+    )
+    u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype)
+    part_a = paddle.nn.functional.relu(x - u_param)
+    part_b = paddle.nn.functional.relu(-u_param - x)
+    x = x - part_a + part_b
+    return x
+def get_optimizer():
+    return paddle.optimizer.Momentum(0.0001, 0.9)
+class TestQuantAwareCase1(StaticCase):
+    def get_model(self):
+        image = paddle.static.data(
+            name='image', shape=[None, 1, 28, 28], dtype='float32'
+        )
+        label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+        model = MobileNet()
+        out = model.net(input=image, class_dim=10)
+        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
+        avg_cost = paddle.mean(x=cost)
+        startup_prog = paddle.static.default_startup_program()
+        train_prog = paddle.static.default_main_program()
+        return startup_prog, train_prog
+    def test_accuracy(self):
+        image = paddle.static.data(
+            name='image', shape=[None, 1, 28, 28], dtype='float32'
+        )
+        image.stop_gradient = False
+        label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+        model = MobileNet()
+        out = model.net(input=image, class_dim=10)
+        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
+        avg_cost = paddle.mean(x=cost)
+        acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+        acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
+        optimizer = paddle.optimizer.Momentum(
+            momentum=0.9,
+            learning_rate=0.01,
+            weight_decay=paddle.regularizer.L2Decay(4e-5),
+        )
+        optimizer.minimize(avg_cost)
+        main_prog = paddle.static.default_main_program()
+        val_prog = main_prog.clone(for_test=True)
+        place = (
+            paddle.CUDAPlace(0)
+            if paddle.is_compiled_with_cuda()
+            else paddle.CPUPlace()
+        )
+        exe = paddle.static.Executor(place)
+        exe.run(paddle.static.default_startup_program())
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform
+        )
+        test_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform
+        )
+        batch_size = 64 if os.environ.get('DATASET') == 'full' else 8
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=place,
+            feed_list=[image, label],
+            drop_last=True,
+            return_list=False,
+            batch_size=batch_size,
+        )
+        valid_loader = paddle.io.DataLoader(
+            test_dataset,
+            places=place,
+            feed_list=[image, label],
+            batch_size=batch_size,
+            return_list=False,
+        )
+        def train(program):
+            iter = 0
+            stop_iter = None if os.environ.get('DATASET') == 'full' else 10
+            for data in train_loader():
+                cost, top1, top5 = exe.run(
+                    program,
+                    feed=data,
+                    fetch_list=[avg_cost, acc_top1, acc_top5],
+                )
+                iter += 1
+                if iter % 100 == 0:
+                    print(
+                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                            iter, cost, top1, top5
+                        )
+                    )
+                if stop_iter is not None and iter == stop_iter:
+                    break
+        def test(program):
+            iter = 0
+            stop_iter = None if os.environ.get('DATASET') == 'full' else 10
+            result = [[], [], []]
+            for data in valid_loader():
+                cost, top1, top5 = exe.run(
+                    program,
+                    feed=data,
+                    fetch_list=[avg_cost, acc_top1, acc_top5],
+                )
+                iter += 1
+                if iter % 100 == 0:
+                    print(
+                        'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                            iter, cost, top1, top5
+                        )
+                    )
+                result[0].append(cost)
+                result[1].append(top1)
+                result[2].append(top5)
+                if stop_iter is not None and iter == stop_iter:
+                    break
+            print(
+                ' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
+                    np.mean(result[0]), np.mean(result[1]), np.mean(result[2])
+                )
+            )
+            return np.mean(result[1]), np.mean(result[2])
+        train(main_prog)
+        top1_1, top5_1 = test(main_prog)
+        config = {
+            'weight_quantize_type': 'channel_wise_abs_max',
+            'activation_quantize_type': 'moving_average_abs_max',
+            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
+            'onnx_format': False,
+        }
+        quant_train_prog_pact = quant_aware(
+            main_prog,
+            place,
+            config,
+            for_test=False,
+            act_preprocess_func=pact,
+            optimizer_func=get_optimizer,
+            executor=exe,
+        )
+        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
+        train(quant_train_prog_pact)
+        quant_eval_prog = convert(quant_eval_prog, place, config)
+        top1_2, top5_2 = test(quant_eval_prog)
+        # values before quantization and after quantization should be close
+        print(f"before quantization: top1: {top1_1}, top5: {top5_1}")
+        print(f"after quantization: top1: {top1_2}, top5: {top5_2}")
+if __name__ == '__main__':
+    unittest.main()
--- a/test/quantization/test_quant_post_quant_aware.py
+++ b/test/quantization/test_quant_post_quant_aware.py
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import random
+import unittest
+import numpy as np
+from test_quant_aware import StaticCase
+import paddle
+from paddle.static.quantization.quanter import convert, quant_aware
+np.random.seed(0)
+random.seed(0)
+paddle.seed(0)
+class RandomDataset(paddle.io.Dataset):
+    def __init__(self, num_samples):
+        self.num_samples = num_samples
+    def __getitem__(self, idx):
+        enc_input = np.random.random([4, 128]).astype('float32')
+        attn_mask = np.random.random([2, 4, 4]).astype('float32')
+        label = np.random.randint(0, 2, (1,)).astype('int64')
+        return enc_input, attn_mask, label
+    def __len__(self):
+        return self.num_samples
+class TestQuantPostQuantAwareCase1(StaticCase):
+    def test_accuracy(self):
+        def simple_transformer(enc_input, attn_mask):
+            encoder_layer = paddle.nn.TransformerEncoderLayer(128, 2, 512)
+            encoder = paddle.nn.TransformerEncoder(encoder_layer, 2)
+            encoder_output = encoder(enc_input, attn_mask)
+            first_token = encoder_output[:, 0]
+            bias = paddle.full(shape=[1, 128], fill_value=1e-6)
+            linear = paddle.nn.Linear(128, 2)
+            logits = linear(first_token + bias)
+            return logits
+        enc_input = paddle.static.data(
+            name='enc_input', shape=[None, 4, 128], dtype='float32'
+        )
+        attn_mask = paddle.static.data(
+            name='attn_mask', shape=[None, 2, 4, 4], dtype='float32'
+        )
+        label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+        out = simple_transformer(enc_input, attn_mask)
+        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
+        avg_cost = paddle.mean(x=cost)
+        acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+        optimizer = paddle.optimizer.Momentum(
+            momentum=0.9,
+            learning_rate=0.01,
+            weight_decay=paddle.regularizer.L2Decay(4e-5),
+        )
+        optimizer.minimize(avg_cost)
+        main_prog = paddle.static.default_main_program()
+        val_prog = main_prog.clone(for_test=True)
+        place = (
+            paddle.CUDAPlace(0)
+            if paddle.is_compiled_with_cuda()
+            else paddle.CPUPlace()
+        )
+        exe = paddle.static.Executor(place)
+        exe.run(paddle.static.default_startup_program())
+        train_dataset = RandomDataset(100)
+        test_dataset = RandomDataset(50)
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=place,
+            feed_list=[enc_input, attn_mask, label],
+            drop_last=True,
+            return_list=False,
+            batch_size=10,
+        )
+        valid_loader = paddle.io.DataLoader(
+            test_dataset,
+            places=place,
+            feed_list=[enc_input, attn_mask, label],
+            batch_size=10,
+            return_list=False,
+        )
+        def train(program):
+            iter = 0
+            for data in train_loader():
+                cost, top1 = exe.run(
+                    program, feed=data, fetch_list=[avg_cost, acc_top1]
+                )
+                iter += 1
+                if iter % 100 == 0:
+                    print(
+                        'train iter={}, avg loss {}, acc_top1 {}'.format(
+                            iter, cost, top1
+                        )
+                    )
+        def test(program):
+            iter = 0
+            result = [[], []]
+            for data in valid_loader():
+                cost, top1 = exe.run(
+                    program, feed=data, fetch_list=[avg_cost, acc_top1]
+                )
+                iter += 1
+                if iter % 100 == 0:
+                    print(
+                        'eval iter={}, avg loss {}, acc_top1 {}'.format(
+                            iter, cost, top1
+                        )
+                    )
+                result[0].append(cost)
+                result[1].append(top1)
+            print(
+                ' avg loss {}, acc_top1 {}'.format(
+                    np.mean(result[0]), np.mean(result[1])
+                )
+            )
+            return np.mean(result[1])
+        train(main_prog)
+        top1_1 = test(main_prog)
+        config = {
+            'weight_quantize_type': 'channel_wise_abs_max',
+            'activation_quantize_type': 'moving_average_abs_max',
+            'quantize_op_types': [
+                'conv2d',
+                'depthwise_conv2d',
+                'mul',
+                'matmul',
+                'elementwise_add',
+            ],
+            'quant_post_first': True,
+            'scale_trainable': True,
+        }
+        calib_config = {
+            'data_loader': valid_loader,
+            'algo': 'abs_max',
+            'feed_list': ['enc_input', 'attn_mask', 'label'],
+            'fetch_list': [avg_cost, acc_top1],
+        }
+        quant_eval_prog, scale_dict, _, _ = quant_aware(
+            val_prog,
+            place,
+            config,
+            for_test=True,
+            calib_config=calib_config,
+            model_type='transformer',
+            return_scale_dict=True,
+        )
+        quant_train_prog = quant_aware(
+            main_prog,
+            place,
+            config,
+            for_test=False,
+            calib_config=calib_config,
+            return_program=True,
+            scale_dict=scale_dict,
+            model_type='transformer',
+        )
+        train(quant_train_prog)
+        quant_eval_prog = convert(quant_eval_prog, place, config)
+        top1_2 = test(quant_eval_prog)
+        # values before quantization and after quantization should be close
+        print(f"before quantization: top1: {top1_1}")
+        print(f"after quantization: top1: {top1_2}")
+if __name__ == '__main__':
+    unittest.main()