diff --git a/python/paddle/fluid/tests/unittests/ir/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/CMakeLists.txt
index 369a2f8b91ad738865af0b34d1239f5e98636105..7d81a744be1f05423edd29a00d5495f642c18cc5 100644
--- a/python/paddle/fluid/tests/unittests/ir/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/ir/CMakeLists.txt
@@ -4,3 +4,5 @@ string(REPLACE ".py" "" TEST_IR_PASSES "${TEST_IR_PASSES}")
 foreach(target ${TEST_IR_PASSES})
   py_test_modules(${target} MODULES ${target})
 endforeach()
+
+add_subdirectory(inference)
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e958ba75638fc28b1ad816371ed3bc529957f944
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
@@ -0,0 +1,30 @@
+file(GLOB TEST_INFERENCE_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
+string(REPLACE ".py" "" TEST_INFERENCE_IR_PASSES "${TEST_INFERENCE_IR_PASSES}")
+
+file(GLOB TEST_TRT_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_trt_*.py")
+string(REPLACE ".py" "" TEST_TRT_IR_PASSES "${TEST_TRT_IR_PASSES}")
+foreach(TEST_INFERENCE_IR_PASS ${TEST_TRT_IR_PASSES})
+  list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES ${TEST_INFERENCE_IR_PASS})
+endforeach()
+
+if(WITH_GPU AND TENSORRT_FOUND)
+  foreach(target ${TEST_TRT_IR_PASSES})
+    py_test_modules(${target} MODULES ${target})
+  endforeach()
+endif()
+
+file(GLOB TEST_MKLDNN_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_mkldnn_*.py")
+string(REPLACE ".py" "" TEST_MKLDNN_IR_PASSES "${TEST_MKLDNN_IR_PASSES}")
+foreach(TEST_INFERENCE_IR_PASS ${TEST_MKLDNN_IR_PASSES})
+  list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES ${TEST_INFERENCE_IR_PASS})
+endforeach()
+
+if(WITH_MKLDNN)
+  foreach(target ${TEST_MKLDNN_IR_PASSES})
+    py_test_modules(${target} MODULES ${target})
+  endforeach()
+endif()
+
+foreach(target ${TEST_INFERENCE_IR_PASSES})
+  py_test_modules(${target} MODULES ${target})
+endforeach()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..95b812bdf7ca48562b66f50e55ae443780857c72
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
@@ -0,0 +1,212 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import os
+import six
+import random
+import unittest
+import numpy as np
+
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import PaddleTensor
+from paddle.fluid.core import PaddleDType
+from paddle.fluid.core import AnalysisConfig
+from paddle.fluid.core import create_paddle_predictor
+
+
+class InferencePassTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.main_program = fluid.Program()
+        cls.startup_program = fluid.Program()
+        cls.feeds = None
+        cls.fetch_list = None
+
+        cls.enable_mkldnn = False
+        cls.enable_trt = False
+        cls.trt_parameters = None
+        cls.enable_lite = False
+        cls.lite_parameters = None
+        cls.path = "./inference_pass/"
+        np.random.seed(1)
+        random.seed(1)
+
+    def _get_place(self):
+        return set([False, core.is_compiled_with_cuda()])
+
+    def _save_models(self, executor, program):
+        outs = executor.run(program=program,
+                            feed=self.feeds,
+                            fetch_list=self.fetch_list,
+                            return_numpy=False)
+        fluid.io.save_inference_model(
+            dirname=self.path,
+            feeded_var_names=list(self.feeds.keys()),
+            target_vars=self.fetch_list,
+            executor=executor,
+            main_program=program,
+            model_filename="model",
+            params_filename="params")
+
+        return outs
+
+    def _get_analysis_outputs(self, config):
+        '''
+        Return AnalysisPredictor outputs. 
+        '''
+        predictor = create_paddle_predictor(config)
+        tensor_shapes = predictor.get_input_tensor_shape()
+        names = predictor.get_input_names()
+        for i, name in enumerate(names):
+            shape = tensor_shapes[name]
+            shape[0] = 1
+            tensor = predictor.get_input_tensor(name)
+            tensor.copy_from_cpu(list(self.feeds.values())[i])
+
+        predictor.zero_copy_run()
+
+        output_names = predictor.get_output_names()
+        outs = [
+            predictor.get_output_tensor(out_name).copy_to_cpu()
+            for out_name in output_names
+        ]
+
+        return outs
+
+    def _get_analysis_config(self,
+                             use_gpu=False,
+                             use_trt=False,
+                             use_mkldnn=False):
+        '''
+        Return a new object of AnalysisConfig. 
+        '''
+        config = AnalysisConfig(
+            os.path.join(self.path, "model"), os.path.join(self.path, "params"))
+        config.disable_gpu()
+        config.switch_specify_input_names(True)
+        config.switch_ir_optim(True)
+        config.switch_use_feed_fetch_ops(False)
+        if use_gpu:
+            config.enable_use_gpu(100, 0)
+            if use_trt:
+                config.enable_tensorrt_engine(
+                    self.trt_parameters.workspace_size,
+                    self.trt_parameters.max_batch_size,
+                    self.trt_parameters.min_subgraph_size,
+                    self.trt_parameters.precision,
+                    self.trt_parameters.use_static,
+                    self.trt_parameters.use_calib_mode)
+        elif use_mkldnn:
+            config.enable_mkldnn()
+
+        return config
+
+    def check_output(self, atol=1e-5):
+        '''
+        Check whether calculating on CPU and GPU, enable TensorRT 
+        or disable TensorRT, enable MKLDNN or disable MKLDNN 
+        are all the same. 
+        '''
+        self.assertFalse(self.feeds is None,
+                         "The inputs of the model is None. ")
+        use_gpu = self._get_place()
+        for place_ in use_gpu:
+            self.check_output_with_option(place_, atol)
+
+    def check_output_with_option(self, use_gpu, atol=1e-5):
+        '''
+        Check whether calculating on CPU and GPU, enable TensorRT 
+        or disable TensorRT, enable MKLDNN or disable MKLDNN 
+        are all the same. 
+        '''
+        place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+        executor = fluid.Executor(place)
+        device = "GPU" if use_gpu else "CPU"
+        executor.run(self.startup_program)
+        outs = self._save_models(executor, self.main_program)
+
+        analysis_outputs = self._get_analysis_outputs(
+            self._get_analysis_config(use_gpu=use_gpu))
+
+        # Check whether the results calculated on CPU and on GPU are the same. 
+        self.assertTrue(
+            len(outs) == len(analysis_outputs),
+            "The number of outputs is different between inference and training forward at {}".
+            format(device))
+
+        for out, analysis_output in zip(outs, analysis_outputs):
+            self.assertTrue(
+                np.allclose(
+                    np.array(out), analysis_output, atol=atol),
+                "Output has diff between inference and training forward at {} ".
+                format(device))
+
+        # Check whether the trt results and the GPU results are the same. 
+        if use_gpu and self.enable_trt:
+            tensorrt_outputs = self._get_analysis_outputs(
+                self._get_analysis_config(
+                    use_gpu=use_gpu, use_trt=self.enable_trt))
+
+            self.assertTrue(
+                len(tensorrt_outputs) == len(outs),
+                "The number of outputs is different between GPU and TensorRT. ")
+
+            for out, tensorrt_output in zip(outs, tensorrt_outputs):
+                self.assertTrue(
+                    np.allclose(
+                        np.array(out), tensorrt_output, atol=atol),
+                    "Output has diff between GPU and TensorRT. ")
+
+        # Check whether the mkldnn results and the CPU results are the same. 
+        if (not use_gpu) and self.enable_mkldnn:
+            mkldnn_outputs = self._get_analysis_outputs(
+                self._get_analysis_config(
+                    use_gpu=use_gpu, use_mkldnn=self.enable_mkldnn))
+
+            self.assertTrue(
+                len(outs) == len(mkldnn_outputs),
+                "The number of outputs is different between CPU and MKLDNN. ")
+
+            for out, mkldnn_output in zip(outs, mkldnn_outputs):
+                self.assertTrue(
+                    np.allclose(
+                        np.array(out), mkldnn_output, atol=atol),
+                    "Output has diff between CPU and MKLDNN. ")
+
+    class TensorRTParam:
+        '''
+        Prepare TensorRT subgraph engine parameters. 
+        '''
+
+        def __init__(self, workspace_size, max_batch_size, min_subgraph_size,
+                     precision, use_static, use_calib_mode):
+            self.workspace_size = workspace_size
+            self.max_batch_size = max_batch_size
+            self.min_subgraph_size = min_subgraph_size
+            self.precision = precision
+            self.use_static = use_static
+            self.use_calib_mode = use_calib_mode
+
+    class LiteParam:
+        '''
+        Prepare Lite subgraph engine parameters. 
+        '''
+
+        def __init__(self, precision, passes_filter, ops_filter):
+            self.precision = precision
+            self.passes_filter = passes_filter
+            self.ops_filter = ops_filter
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_relu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_relu_fuse_pass.py
new file mode 100644
index 0000000000000000000000000000000000000000..2346e93d64dce21d9bdd7687bd8d5ed38ff5f188
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_relu_fuse_pass.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import AnalysisConfig
+
+
+class ConvBnFusePassMKLDNNTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(
+                name="data", shape=[-1, 3, 100, 100], dtype="float32")
+            conv_out = fluid.layers.conv2d(
+                data, num_filters=3, filter_size=3, bias_attr=False, act="relu")
+
+        self.feeds = {
+            "data": np.random.random((1, 3, 100, 100)).astype("float32")
+        }
+        self.fetch_list = [conv_out]
+        self.enable_mkldnn = True
+
+    def test_check_output(self):
+        use_gpu = False
+        self.check_output_with_option(use_gpu)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
new file mode 100644
index 0000000000000000000000000000000000000000..f66822171cb58a7671ecffc294a8386c6e42ebc4
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import AnalysisConfig
+
+
+class FCFusePassTRTTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name="data", shape=[32, 128], dtype="float32")
+            fc_out1 = fluid.layers.fc(input=data,
+                                      size=128,
+                                      num_flatten_dims=1,
+                                      act="relu")
+            fc_out2 = fluid.layers.fc(input=fc_out1,
+                                      size=32,
+                                      num_flatten_dims=1)
+            out = fluid.layers.softmax(input=fc_out2)
+
+        self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
+        self.enable_trt = True
+        self.trt_parameters = FCFusePassTRTTest.TensorRTParam(
+            1 << 20, 1, 3, AnalysisConfig.Precision.Float32, False, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        use_gpu = [False]
+        if core.is_compiled_with_cuda():
+            use_gpu.append(True)
+        for i in range(len(use_gpu)):
+            self.check_output_with_option(use_gpu[i])
+
+
+if __name__ == "__main__":
+    unittest.main()