[Paddle Inference-TRT]Adding six op unittest codes of TRT INT8 (#35130)

* add_op_unittest

[Paddle Inference-TRT]Adding six op unittest codes of TRT INT8 (#35130)
* add_op_unittest
39565147 · xiaoxiaohehe001 · GitHub · b94d7ff3 · 39565147 · 39565147
5 changed file
--- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
@@ -39,4 +39,7 @@ set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_g
 set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60)
 set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60)
 set_tests_properties(test_trt_convert_conv2d PROPERTIES TIMEOUT 100)
+set_tests_properties(test_trt_fc_fuse_quant_dequant_pass PROPERTIES TIMEOUT 100)
+set_tests_properties(test_trt_conv_quant_dequant_pass PROPERTIES TIMEOUT 100)
+set_tests_properties(test_trt_matmul_quant_dequant PROPERTIES TIMEOUT 100)
 endif()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import shutil
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+from quant_dequant_test import QuantDequantTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import PassVersionChecker
+from paddle.fluid.core import AnalysisConfig
+
+
+class QuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest):
+    def setUp(self):
+        self.set_params()
+
+        def network():
+            self.data = fluid.data(
+                name='data', shape=[1, 28, 28], dtype='float32')
+            data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14])
+            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
+            label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1])
+            conv_out = fluid.layers.conv2d(
+                input=data_reshape,
+                num_filters=self.conv_num_filters,
+                filter_size=self.conv_filter_size,
+                groups=self.conv_groups,
+                padding=self.conv_padding,
+                bias_attr=False,
+                use_cudnn=self.use_cudnn,
+                act=None)
+            if self.conv_padding == [1, 1]:
+                cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816])
+            elif self.conv_padding == 'VALID':
+                cout = fluid.layers.reshape(conv_out, shape=[1, 1, 7744])
+            elif self.conv_padding == 'SAME':
+                cout = fluid.layers.reshape(conv_out, shape=[1, 1, 12544])
+            elif self.conv_groups == 4:
+                cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816])
+            result = fluid.layers.relu(cout)
+            loss = fluid.layers.cross_entropy(input=result, label=label_shape)
+            avg_loss = fluid.layers.mean(loss)
+            return avg_loss, result
+
+        self.main_program.random_seed = 2
+        self.startup_program.random_seed = 2
+        self.test_main_program.random_seed = 2
+        #self.test_startup_program.random_seed = 2
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.main_program, self.startup_program):
+                self.loss, result = network()
+                opt = fluid.optimizer.Adam(learning_rate=0.0001)
+                opt.minimize(self.loss)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.test_main_program,
+                                     self.startup_program):
+                network()
+        self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")}
+        self.fetch_list = [result]
+        self.enable_trt = True
+        self.trt_parameters = QuantDequantTensorRTSubgraphPassConvTest.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
+        self.activation_quantize_type = 'moving_average_abs_max'
+        self.weight_quantize_type = 'channel_wise_abs_max'
+
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 1
+        self.conv_padding = [1, 1]
+        self.use_cudnn = True
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(
+                use_gpu, atol=1e-1, flatten=False, rtol=1e-1)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class QuantDequantTensorRTSubgraphPassConvValidPaddingTest(
+        QuantDequantTensorRTSubgraphPassConvTest):
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 1
+        self.conv_padding = 'VALID'
+        self.use_cudnn = True
+
+
+class QuantDequantTensorRTSubgraphPassConvSamePaddingTest(
+        QuantDequantTensorRTSubgraphPassConvTest):
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 1
+        self.conv_padding = 'SAME'
+        self.use_cudnn = True
+
+
+class QuantDequantTensorRTSubgraphPassDWConvTest(
+        QuantDequantTensorRTSubgraphPassConvTest):
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 4
+        self.conv_padding = [1, 1]
+        self.use_cudnn = True
+
+
+class DynamicShapeQuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest):
+    def setUp(self):
+        self.set_params()
+
+        def network():
+            self.data = fluid.data(
+                name='data', shape=[1, 28, 28], dtype='float32')
+            data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14])
+            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
+            label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1])
+            conv_out = fluid.layers.conv2d(
+                input=data_reshape,
+                num_filters=self.conv_num_filters,
+                filter_size=self.conv_filter_size,
+                groups=self.conv_groups,
+                padding=self.conv_padding,
+                bias_attr=False,
+                use_cudnn=self.use_cudnn,
+                act=None)
+            cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816])
+            result = fluid.layers.relu(cout)
+            loss = fluid.layers.cross_entropy(input=result, label=label_shape)
+            avg_loss = fluid.layers.mean(loss)
+            return avg_loss, result
+
+        self.main_program.random_seed = 2
+        self.startup_program.random_seed = 2
+        self.test_main_program.random_seed = 2
+        #self.test_startup_program.random_seed = 2
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.main_program, self.startup_program):
+                self.loss, result = network()
+                opt = fluid.optimizer.Adam(learning_rate=0.0001)
+                opt.minimize(self.loss)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.test_main_program,
+                                     self.startup_program):
+                network()
+        self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")}
+        self.fetch_list = [result]
+        self.enable_trt = True
+        self.trt_parameters = DynamicShapeQuantDequantTensorRTSubgraphPassConvTest.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
+        self.dynamic_shape_params = DynamicShapeQuantDequantTensorRTSubgraphPassConvTest.DynamicShapeParam(
+            {
+                "conv2d_0.tmp_0": [1, 4, 14, 14],
+                "data": [1, 28, 28],
+                "depthwise_conv2d_0.tmp_0": [1, 4, 14, 14],
+                "reshape2_0.tmp_0": [1, 4, 14, 14],
+                "reshape2_2.tmp_0": [1, 1, 10816]
+            }, {
+                "conv2d_0.tmp_0": [4, 4, 14, 14],
+                "data": [4, 28, 28],
+                "depthwise_conv2d_0.tmp_0": [4, 4, 14, 14],
+                "reshape2_0.tmp_0": [4, 4, 14, 14],
+                "reshape2_2.tmp_0": [1, 1, 43264]
+            }, {
+                "conv2d_0.tmp_0": [1, 4, 14, 14],
+                "data": [1, 28, 28],
+                "depthwise_conv2d_0.tmp_0": [1, 4, 14, 14],
+                "reshape2_0.tmp_0": [1, 4, 14, 14],
+                "reshape2_2.tmp_0": [1, 1, 10816]
+            }, False)
+        self.activation_quantize_type = 'moving_average_abs_max'
+        self.weight_quantize_type = 'channel_wise_abs_max'
+
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 1
+        self.conv_padding = [1, 1]
+        self.use_cudnn = True
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(
+                use_gpu, atol=1e-1, flatten=False, rtol=1e-1)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class QuantDequantTensorRTSubgraphPassConvTransposeTest(QuantDequantTest):
+    def setUp(self):
+        self.set_params()
+
+        def network():
+            self.data = fluid.data(
+                name='data', shape=[1, 28, 28], dtype='float32')
+            data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14])
+            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
+            label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1])
+            conv_out = fluid.layers.conv2d_transpose(
+                input=data_reshape,
+                num_filters=self.conv_num_filters,
+                filter_size=self.conv_filter_size,
+                groups=self.conv_groups,
+                padding=self.conv_padding,
+                bias_attr=False,
+                use_cudnn=self.use_cudnn,
+                act=None)
+            if self.conv_padding == [1, 1]:
+                cout = fluid.layers.reshape(conv_out, shape=[1, 1, 14400])
+            elif self.conv_padding == 'VALID':
+                cout = fluid.layers.reshape(conv_out, shape=[1, 1, 18496])
+            elif self.conv_padding == 'SAME':
+                cout = fluid.layers.reshape(conv_out, shape=[1, 1, 12544])
+            elif self.conv_groups == 4:
+                cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816])
+            result = fluid.layers.relu(cout)
+            loss = fluid.layers.cross_entropy(input=result, label=label_shape)
+            avg_loss = fluid.layers.mean(loss)
+            return avg_loss, result
+
+        self.main_program.random_seed = 2
+        self.startup_program.random_seed = 2
+        self.test_main_program.random_seed = 2
+        #self.test_startup_program.random_seed = 2
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.main_program, self.startup_program):
+                self.loss, result = network()
+                opt = fluid.optimizer.Adam(learning_rate=0.0001)
+                opt.minimize(self.loss)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.test_main_program,
+                                     self.startup_program):
+                network()
+        self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")}
+        self.fetch_list = [result]
+        self.enable_trt = True
+        self.trt_parameters = QuantDequantTensorRTSubgraphPassConvTransposeTest.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
+        self.activation_quantize_type = 'moving_average_abs_max'
+        self.weight_quantize_type = 'channel_wise_abs_max'
+
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 1
+        self.conv_padding = [1, 1]
+        self.use_cudnn = True
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(
+                use_gpu, atol=1e-1, flatten=False, rtol=1e-1)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class QuantDequantTensorRTSubgraphPassConvTransValidPaddingTest(
+        QuantDequantTensorRTSubgraphPassConvTransposeTest):
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 1
+        self.conv_padding = 'VALID'
+        self.use_cudnn = True
+
+
+class QuantDequantTensorRTSubgraphPassConvTransSamePaddingTest(
+        QuantDequantTensorRTSubgraphPassConvTransposeTest):
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 1
+        self.conv_padding = 'SAME'
+        self.use_cudnn = True
+
+
+class QuantDequantTensorRTSubgraphPassTransDWConvTest(
+        QuantDequantTensorRTSubgraphPassConvTransposeTest):
+    def set_params(self):
+        self.conv_num_filters = 64
+        self.conv_filter_size = 4
+        self.conv_groups = 4
+        self.conv_padding = [1, 1]
+        self.use_cudnn = True
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
@@ -285,55 +285,5 @@ class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
            self.check_output_with_option(use_gpu[i])


-class FcQuantDequantFusePassTRTTest(QuantDequantTest):
-    def setUp(self):
-        def network():
-            self.data = fluid.data(
-                name='data', shape=[1, 28, 28], dtype='float32')
-            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
-            fc_out = fluid.layers.fc(input=self.data,
-                                     size=10,
-                                     num_flatten_dims=1,
-                                     bias_attr=False,
-                                     act=None)
-            result = fluid.layers.relu(fc_out)
-            loss = fluid.layers.cross_entropy(input=result, label=self.label)
-            avg_loss = fluid.layers.mean(loss)
-            return avg_loss, result
-
-        self.main_program.random_seed = 2
-        self.startup_program.random_seed = 2
-        self.test_main_program.random_seed = 2
-        #self.test_startup_program.random_seed = 2
-        with fluid.unique_name.guard():
-            with fluid.program_guard(self.main_program, self.startup_program):
-                self.loss, result = network()
-                opt = fluid.optimizer.Adam(learning_rate=0.0001)
-                opt.minimize(self.loss)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(self.test_main_program,
-                                     self.startup_program):
-                network()
-        self.feeds = {"data": np.random.random((1, 28, 28)).astype("float32")}
-        self.fetch_list = [result]
-
-        self.enable_trt = True
-
-        self.trt_parameters = FcQuantDequantFusePassTRTTest.TensorRTParam(
-            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
-        self.activation_quantize_type = 'moving_average_abs_max'
-        self.weight_quantize_type = 'channel_wise_abs_max'
-
-    def test_check_output(self):
-        #self.quant_dequant()
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(
-                use_gpu, atol=1e-2, flatten=False, rtol=1e-2)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible(
-                    'quant_conv2d_dequant_fuse_pass'))
-
-
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+from quant_dequant_test import QuantDequantTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import AnalysisConfig
+from paddle.fluid.core import PassVersionChecker
+
+
+class FCQuantDequantFusePassTRTDims3Cols1Test(QuantDequantTest):
+    def setUp(self):
+        def network():
+            self.data = fluid.data(
+                name='data', shape=[1, 28, 28], dtype='float32')
+            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
+            fc_out = fluid.layers.fc(input=self.data,
+                                     size=10,
+                                     num_flatten_dims=1,
+                                     bias_attr=False,
+                                     act="relu")
+            result = fluid.layers.relu(fc_out)
+            loss = fluid.layers.cross_entropy(input=result, label=self.label)
+            avg_loss = fluid.layers.mean(loss)
+            return avg_loss, result
+
+        self.main_program.random_seed = 2
+        self.startup_program.random_seed = 2
+        self.test_main_program.random_seed = 2
+        #self.test_startup_program.random_seed = 2
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.main_program, self.startup_program):
+                self.loss, result = network()
+                opt = fluid.optimizer.Adam(learning_rate=0.0001)
+                opt.minimize(self.loss)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.test_main_program,
+                                     self.startup_program):
+                network()
+        self.feeds = {"data": np.random.random((1, 28, 28)).astype("float32")}
+        self.fetch_list = [result]
+        self.enable_trt = True
+        self.trt_parameters = FCQuantDequantFusePassTRTDims3Cols1Test.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
+        self.dynamic_shape_params = FCQuantDequantFusePassTRTDims3Cols1Test.DynamicShapeParam(
+            {
+                'data': [1, 28, 28],
+                'reshape2_1.tmp_0': [1, 1, 10]
+            }, {'data': [2, 28, 28],
+                'reshape2_1.tmp_0': [2, 1, 10]},
+            {'data': [1, 28, 28],
+             'reshape2_1.tmp_0': [1, 1, 10]}, False)
+        self.activation_quantize_type = 'moving_average_abs_max'
+        self.weight_quantize_type = 'channel_wise_abs_max'
+
+    def test_check_output(self):
+        #self.quant_dequant()
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(
+                use_gpu, atol=1e-2, flatten=False, rtol=1e-2)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible(
+                    'quant_conv2d_dequant_fuse_pass'))
+
+
+class FCQuantDequantFusePassTRTDims3Cols2Test(QuantDequantTest):
+    def setUp(self):
+        def network():
+            self.data = fluid.data(
+                name='data', shape=[1, 28, 28], dtype='float32')
+            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
+            label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1])
+            fc_out = fluid.layers.fc(input=self.data,
+                                     size=28,
+                                     num_flatten_dims=2,
+                                     bias_attr=False,
+                                     act=None)
+            c_out = fluid.layers.reshape(fc_out, shape=[1, 1, 784])
+            result = fluid.layers.relu(c_out)
+            loss = fluid.layers.cross_entropy(input=result, label=label_shape)
+            avg_loss = fluid.layers.mean(loss)
+            return avg_loss, result
+
+        self.main_program.random_seed = 2
+        self.startup_program.random_seed = 2
+        self.test_main_program.random_seed = 2
+        #self.test_startup_program.random_seed = 2
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.main_program, self.startup_program):
+                self.loss, result = network()
+                opt = fluid.optimizer.Adam(learning_rate=0.0001)
+                opt.minimize(self.loss)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.test_main_program,
+                                     self.startup_program):
+                network()
+        self.feeds = {"data": np.random.random((1, 28, 28)).astype("float32")}
+        self.fetch_list = [result]
+        self.enable_trt = True
+        self.trt_parameters = FCQuantDequantFusePassTRTDims3Cols2Test.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
+        self.dynamic_shape_params = FCQuantDequantFusePassTRTDims3Cols2Test.DynamicShapeParam(
+            {
+                'data': [1, 28, 28],
+                'reshape2_1.tmp_0': [1, 1, 784]
+            }, {'data': [4, 28, 28],
+                'reshape2_1.tmp_0': [4, 1, 784]},
+            {'data': [1, 28, 28],
+             'reshape2_1.tmp_0': [1, 1, 784]}, False)
+        self.activation_quantize_type = 'moving_average_abs_max'
+        self.weight_quantize_type = 'channel_wise_abs_max'
+
+    def test_check_output(self):
+        #self.quant_dequant()
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(
+                use_gpu, atol=1e-1, flatten=False, rtol=1e-1)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible(
+                    'quant_conv2d_dequant_fuse_pass'))
+
+
+class FCQuantDequantFusePassTRTDims3Cols3Test(QuantDequantTest):
+    def setUp(self):
+        def network():
+            self.data = fluid.data(
+                name='data', shape=[1, 28, 28], dtype='float32')
+            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
+            label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1])
+            reshape_out = fluid.layers.reshape(self.data, shape=[1, 14, 14, 4])
+            fc_out = fluid.layers.fc(input=reshape_out,
+                                     size=14,
+                                     num_flatten_dims=3,
+                                     bias_attr=False,
+                                     act=None)
+            c_out = fluid.layers.reshape(fc_out, shape=[1, 1, 2744])
+            result = fluid.layers.relu(c_out)
+            loss = fluid.layers.cross_entropy(input=result, label=label_shape)
+            avg_loss = fluid.layers.mean(loss)
+            return avg_loss, result
+
+        self.main_program.random_seed = 2
+        self.startup_program.random_seed = 2
+        self.test_main_program.random_seed = 2
+        #self.test_startup_program.random_seed = 2
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.main_program, self.startup_program):
+                self.loss, result = network()
+                opt = fluid.optimizer.Adam(learning_rate=0.0001)
+                opt.minimize(self.loss)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.test_main_program,
+                                     self.startup_program):
+                network()
+        self.feeds = {"data": np.random.random((1, 28, 28)).astype("float32")}
+        self.fetch_list = [result]
+        self.enable_trt = True
+        self.trt_parameters = FCQuantDequantFusePassTRTDims3Cols3Test.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
+        self.dynamic_shape_params = FCQuantDequantFusePassTRTDims3Cols3Test.DynamicShapeParam(
+            {
+                'data': [1, 28, 28],
+                "reshape2_1.tmp_0": [1, 14, 14, 4],
+                "reshape2_2.tmp_0": [1, 1, 2744]
+            }, {
+                'data': [4, 28, 28],
+                "reshape2_1.tmp_0": [4, 14, 14, 4],
+                "reshape2_2.tmp_0": [4, 1, 2744]
+            }, {
+                'data': [1, 28, 28],
+                "reshape2_1.tmp_0": [1, 14, 14, 4],
+                "reshape2_2.tmp_0": [1, 1, 2744]
+            }, False)
+        self.activation_quantize_type = 'moving_average_abs_max'
+        self.weight_quantize_type = 'channel_wise_abs_max'
+
+    def test_check_output(self):
+        #self.quant_dequant()
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(
+                use_gpu, atol=1e0, flatten=False, rtol=1e0)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible(
+                    'quant_conv2d_dequant_fuse_pass'))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+from quant_dequant_test import QuantDequantTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import PassVersionChecker
+from paddle.fluid.core import AnalysisConfig
+
+
+class TensorRTMatMulQuantDequantDims3Test(QuantDequantTest):
+    def setUp(self):
+        self.set_params()
+
+        def network():
+            self.data = fluid.data(
+                name='data', shape=[1, 28, 28], dtype='float32')
+            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
+            matmul_out = fluid.layers.matmul(
+                x=self.data,
+                y=self.data,
+                transpose_x=self.transpose_x,
+                transpose_y=self.transpose_y,
+                alpha=self.alpha)
+            fc_out = fluid.layers.fc(input=matmul_out,
+                                     size=10,
+                                     num_flatten_dims=1,
+                                     bias_attr=False,
+                                     act=None)
+            result = fluid.layers.relu(fc_out)
+            loss = fluid.layers.cross_entropy(input=result, label=self.label)
+            avg_loss = fluid.layers.mean(loss)
+            return avg_loss, result
+
+        self.main_program.random_seed = 2
+        self.startup_program.random_seed = 2
+        self.test_main_program.random_seed = 2
+        #self.test_startup_program.random_seed = 2
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.main_program, self.startup_program):
+                self.loss, result = network()
+                opt = fluid.optimizer.Adam(learning_rate=0.0001)
+                opt.minimize(self.loss)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.test_main_program,
+                                     self.startup_program):
+                network()
+        self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")}
+        self.fetch_list = [result]
+        self.enable_trt = True
+        self.trt_parameters = TensorRTMatMulQuantDequantDims3Test.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
+        self.activation_quantize_type = 'moving_average_abs_max'
+        self.weight_quantize_type = 'channel_wise_abs_max'
+
+    def set_params(self):
+        self.transpose_x = False
+        self.transpose_y = False
+        self.alpha = 1.0
+
+    def test_check_output(self):
+        #self.quant_dequant()
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(
+                use_gpu, atol=1e-1, flatten=False, rtol=1e-1)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class TensorRTMatMulQuantDequantDims3TransposeXTest(
+        TensorRTMatMulQuantDequantDims3Test):
+    def set_params(self):
+        self.transpose_x = True
+        self.transpose_y = False
+        self.alpha = 1.0
+
+
+class TensorRTMatMulQuantDequantDims3TransposeYTest(
+        TensorRTMatMulQuantDequantDims3Test):
+    def set_params(self):
+        self.transpose_x = False
+        self.transpose_y = True
+        self.alpha = 1.0
+
+
+class TensorRTMatMulQuantDequantDims3TransposeXYTest(
+        TensorRTMatMulQuantDequantDims3Test):
+    def set_params(self):
+        self.transpose_x = True
+        self.transpose_y = True
+        self.alpha = 1.0
+
+
+class TensorRTMatMulQuantDequantDims4Test(QuantDequantTest):
+    def setUp(self):
+        self.set_params()
+
+        def network():
+            self.data = fluid.data(
+                name='data', shape=[1, 28, 28], dtype='float32')
+            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
+            reshape_out = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14])
+            matmul_out = fluid.layers.matmul(
+                x=reshape_out,
+                y=reshape_out,
+                transpose_x=self.transpose_x,
+                transpose_y=self.transpose_y,
+                alpha=self.alpha)
+            out = fluid.layers.batch_norm(matmul_out, is_test=True)
+            fc_out = fluid.layers.fc(input=matmul_out,
+                                     size=10,
+                                     num_flatten_dims=1,
+                                     bias_attr=False,
+                                     act=None)
+            result = fluid.layers.relu(fc_out)
+            loss = fluid.layers.cross_entropy(input=result, label=self.label)
+            avg_loss = fluid.layers.mean(loss)
+            return avg_loss, result
+
+        self.main_program.random_seed = 2
+        self.startup_program.random_seed = 2
+        self.test_main_program.random_seed = 2
+        #self.test_startup_program.random_seed = 2
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.main_program, self.startup_program):
+                self.loss, result = network()
+                opt = fluid.optimizer.Adam(learning_rate=0.0001)
+                opt.minimize(self.loss)
+        with fluid.unique_name.guard():
+            with fluid.program_guard(self.test_main_program,
+                                     self.startup_program):
+                network()
+        self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")}
+        self.fetch_list = [result]
+        self.enable_trt = True
+        self.trt_parameters = TensorRTMatMulQuantDequantDims4Test.TensorRTParam(
+            1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False)
+        self.activation_quantize_type = 'moving_average_abs_max'
+        self.weight_quantize_type = 'channel_wise_abs_max'
+
+    def set_params(self):
+        self.transpose_x = False
+        self.transpose_y = False
+        self.alpha = 1.0
+
+    def test_check_output(self):
+        #self.quant_dequant()
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(
+                use_gpu, atol=1e-1, flatten=False, rtol=1e-1)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+class TensorRTMatMulQuantDequantDims4TransposeXTest(
+        TensorRTMatMulQuantDequantDims4Test):
+    def set_params(self):
+        self.transpose_x = True
+        self.transpose_y = False
+        self.alpha = 1.0
+
+
+class TensorRTMatMulQuantDequantDims4TransposeYTest(
+        TensorRTMatMulQuantDequantDims4Test):
+    def set_params(self):
+        self.transpose_x = False
+        self.transpose_y = True
+        self.alpha = 1.0
+
+
+class TensorRTMatMulQuantDequantDims4TransposeXYTest(
+        TensorRTMatMulQuantDequantDims4Test):
+    def set_params(self):
+        self.transpose_x = True
+        self.transpose_y = True
+        self.alpha = 1.0
+
+
+class TensorRTMatMulQuantDequantDims4ScaleTest(
+        TensorRTMatMulQuantDequantDims4Test):
+    def set_params(self):
+        self.transpose_x = False
+        self.transpose_y = False
+        self.alpha = 2.0
+
+
+if __name__ == "__main__":
+    unittest.main()