diff --git a/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc b/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc
index 865b556f301c0dfa57d05e142d6925ca9ca543f4..734f8957ad09e9aad66f6dcf82a47cf008ad02f9 100644
--- a/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc
+++ b/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc
@@ -169,9 +169,6 @@ Flatten2MatmulFusePass::Flatten2MatmulFusePass() {
       .AddInput("X")
       .IsTensor()
       .End()
-      .AddInput("Y")
-      .IsTensor()
-      .End()
       .AddOutput("Out")
       .IsTensor()
       .End()
@@ -179,7 +176,7 @@ Flatten2MatmulFusePass::Flatten2MatmulFusePass() {
       .IsTensor()
       .End()
       .AddAttr("axis")
-      .IsNumGE(0)
+      .IsNumEQ(1)
       .End();
 
   AddOpCompat(OpCompat("mul"))
@@ -222,7 +219,7 @@ Squeeze2MatmulFusePass::Squeeze2MatmulFusePass() {
       .IsBoolEQ(false)
       .End();
 
-  AddOpCompat(OpCompat("Squeeze2"))
+  AddOpCompat(OpCompat("squeeze2"))
       .AddInput("X")
       .IsTensor()
       .End()
@@ -593,10 +590,10 @@ Reshape2MatmulFusePass::Reshape2MatmulFusePass() {
       .IsNumLT(1.00001f)
       .End()
       .AddAttr("transpose_X")
-      .IsBoolEQ("False")
+      .IsBoolEQ(false)
       .End()
       .AddAttr("transpose_Y")
-      .IsBoolEQ("False")
+      .IsBoolEQ(false)
       .End();
 
   AddOpCompat(OpCompat("mul"))
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
index c4e5d98b80a8611dea4a54dd926346ca854b11f6..75d182bd01e7492ae7132e699900b006fd19c32a 100755
--- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
@@ -83,9 +83,11 @@ if (WITH_MKLDNN AND TENSORRT_FOUND AND WITH_GPU)
   set_tests_properties(test_conv_act_mkldnn_fuse_pass PROPERTIES TIMEOUT 120)
   set_tests_properties(test_conv_elementwise_add2_act_fuse_pass PROPERTIES TIMEOUT 120)
   set_tests_properties(test_conv_elementwise_add_act_fuse_pass PROPERTIES TIMEOUT 120)
-  set_tests_properties(test_conv_elementwise_add_act_fuse_pass PROPERTIES TIMEOUT 90)
   set_tests_properties(test_matmul_scale_fuse_pass PROPERTIES TIMEOUT 60)
   set_tests_properties(test_matmul_v2_scale_fuse_pass PROPERTIES TIMEOUT 60)
+  set_tests_properties(test_flatten2_matmul_fuse_pass PROPERTIES TIMEOUT 240)
+  set_tests_properties(test_squeeze2_matmul_fuse_pass PROPERTIES TIMEOUT 240)
+  set_tests_properties(test_reshape2_matmul_fuse_pass PROPERTIES TIMEOUT 240)
 endif()
 
 if (WITH_MKLDNN)
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_flatten2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_flatten2_matmul_fuse_pass.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cd9ae970bb588a4834ddcae5ed6b1f0c9bfaf92
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_flatten2_matmul_fuse_pass.py
@@ -0,0 +1,181 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from auto_scan_test import PassAutoScanTest, IgnoreReasons
+from program_config import TensorConfig, ProgramConfig, OpConfig
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+import unittest
+
+import hypothesis
+from hypothesis import given, settings, seed, example, assume, reproduce_failure
+import hypothesis.strategies as st
+
+
+class TestFlatten2MatmulFusePass(PassAutoScanTest):
+    """
+        x_var  
+          |          
+       flatten2 
+          \
+    flatten2_out_var    y_var
+             \           /
+                 matmul      bias_var
+                    \          /
+                   elementwise_add  
+    """
+
+    def sample_predictor_configs(self, program_config):
+        # TRT
+        # config = self.create_trt_inference_config()
+        # config.enable_tensorrt_engine(
+        #     max_batch_size=10,
+        #     workspace_size=102400,
+        #     min_subgraph_size=0,
+        #     precision_mode=paddle_infer.PrecisionType.Float32,
+        #     use_static=False,
+        #     use_calib_mode=False)
+        # yield config, ['mul', 'elementwise_add'], (1e-5, 1e-5)
+
+        # cpu
+        config = self.create_inference_config(use_gpu=False)
+        yield config, ["mul", "elementwise_add"], (1e-5, 1e-5)
+
+        # for gpu
+        config = self.create_inference_config(use_gpu=True)
+        yield config, ["mul", "elementwise_add"], (1e-5, 1e-5)
+
+    def add_ignore_pass_case(self):
+        # Here we put some skip rules to avoid known bugs
+        def teller1(program_config, predictor_config):
+            if predictor_config.tensorrt_engine_enabled():
+                # On 3080, the results of MatMul and Mul are different 
+                # When the input Y is weight
+                return True
+
+                # On TRT when the input Y is weight, Mul is converted to FC
+                if "matmul_y" not in program_config.weights \
+                    or "bias" not in program_config.weights:
+                    return True
+
+                y_shape = list(program_config.weights["matmul_y"].shape)
+                bias_shape = program_config.weights["bias"].shape
+                axis = program_config.ops[2].attrs["axis"]
+                # bias should be [mul_y_shape[-1]]
+                if axis == 0 or bias_shape[0] != y_shape[1] or len(
+                        bias_shape) != 1:
+                    return True
+            return False
+
+        self.add_ignore_check_case(
+            teller1,
+            IgnoreReasons.PASS_ACCURACY_ERROR,
+            "The pass error on TRT while shape of bias is not [out_size].", )
+
+    def sample_program_config(self, draw):
+        # 1. Generate shape and attr of flatten2
+        x_shape = draw(
+            st.lists(
+                st.integers(
+                    min_value=1, max_value=10), min_size=4, max_size=4))
+        # [a, b, c, d] => [a, b*c*d]
+        flatten_axis = 1
+        flatten_shape = [x_shape[0], x_shape[1] * x_shape[2] * x_shape[3]]
+
+        # 2. Generate attr:transpose_X/transpose_Y/alpha of matmul
+        alpha = 1.0
+        transpose_X = False
+        transpose_Y = False
+
+        # 3. Generate legal shape of input:Y of matmul
+        y_shape = draw(
+            st.lists(
+                st.integers(
+                    min_value=1, max_value=8), min_size=2, max_size=2))
+        y_shape[0] = flatten_shape[1]
+
+        # 4. Generate legal attr:axis of elementwise_add
+        axis = draw(st.integers(min_value=-1, max_value=1))
+        if axis == 0:
+            bias_shape = [flatten_shape[0], ]
+        elif axis == 1:
+            bias_shape = [y_shape[1]]
+        else:
+            bias_shape = [flatten_shape[0], y_shape[1]]
+            if draw(st.booleans()):
+                bias_shape[1] = 1
+
+        flatten2_op = OpConfig(
+            "flatten2",
+            inputs={"X": ["flatten2_x"], },
+            axis=flatten_axis,
+            outputs={"Out": ["flatten2_out"],
+                     "XShape": ["xshape"]}, )
+        matmul_op = OpConfig(
+            "matmul",
+            inputs={"X": ["flatten2_out"],
+                    "Y": ["matmul_y"]},
+            outputs={"Out": ["matmul_out"]},
+            alpha=alpha,
+            transpose_X=transpose_X,
+            transpose_Y=transpose_Y,
+            fused_reshape_X=[],
+            fused_reshape_Y=[],
+            fused_transpose_X=[],
+            fused_transpose_Y=[],
+            fused_reshape_Out=[],
+            fused_transpose_Out=[], )
+
+        add_op = OpConfig(
+            "elementwise_add",
+            inputs={"X": ["matmul_out"],
+                    "Y": ["bias"]},
+            outputs={"Out": ["add_out"]},
+            axis=axis, )
+
+        ops = [flatten2_op, matmul_op, add_op]
+
+        if draw(st.integers(min_value=1, max_value=10)) <= 8:
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={
+                    "matmul_y": TensorConfig(shape=y_shape),
+                    "bias": TensorConfig(shape=bias_shape),
+                },
+                inputs={"flatten2_x": TensorConfig(shape=x_shape), },
+                outputs=ops[-1].outputs["Out"], )
+        else:
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={},
+                inputs={
+                    "flatten2_x": TensorConfig(shape=x_shape),
+                    "matmul_y": TensorConfig(shape=y_shape),
+                    "bias": TensorConfig(shape=bias_shape),
+                },
+                outputs=ops[-1].outputs["Out"], )
+        return program_config
+
+    def test(self):
+        self.run_and_statis(
+            quant=False,
+            max_examples=50,
+            max_duration=1000,
+            passes=["flatten2_matmul_fuse_pass"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_reshape2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_reshape2_matmul_fuse_pass.py
new file mode 100644
index 0000000000000000000000000000000000000000..951ec8e4e8ef42268e480ffe5730e6fa944cbea8
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_reshape2_matmul_fuse_pass.py
@@ -0,0 +1,179 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from auto_scan_test import PassAutoScanTest, IgnoreReasons
+from program_config import TensorConfig, ProgramConfig, OpConfig
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+import unittest
+
+import hypothesis
+from hypothesis import given, settings, seed, example, assume, reproduce_failure
+import hypothesis.strategies as st
+
+
+class TestReshape2MatmulFusePass(PassAutoScanTest):
+    """
+        x_var  
+          |          
+       reshape2 
+          \
+    reshape2_out_var    y_var
+             \           /
+                 matmul      bias_var
+                    \          /
+                   elementwise_add  
+    """
+
+    def sample_predictor_configs(self, program_config):
+        # TRT
+        # config = self.create_trt_inference_config()
+        # config.enable_tensorrt_engine(
+        #     max_batch_size=10,
+        #     workspace_size=102400,
+        #     min_subgraph_size=0,
+        #     precision_mode=paddle_infer.PrecisionType.Float32,
+        #     use_static=False,
+        #     use_calib_mode=False)
+        # yield config, ['mul', 'elementwise_add'], (1e-5, 1e-5)
+
+        # cpu
+        config = self.create_inference_config(use_gpu=False)
+        yield config, ["mul", "elementwise_add"], (1e-5, 1e-5)
+
+        # for gpu
+        config = self.create_inference_config(use_gpu=True)
+        yield config, ["mul", "elementwise_add"], (1e-5, 1e-5)
+
+    def add_ignore_pass_case(self):
+        # Here we put some skip rules to avoid known bugs
+        def teller1(program_config, predictor_config):
+            if predictor_config.tensorrt_engine_enabled():
+                # On 3080, the results of MatMul and Mul are different 
+                # When the input Y is weight
+                return True
+
+                # On TRT when the input Y is weight, Mul is converted to FC
+                if "matmul_y" not in program_config.weights \
+                    or "bias" not in program_config.weights:
+                    return True
+
+                y_shape = list(program_config.weights["matmul_y"].shape)
+                bias_shape = program_config.weights["bias"].shape
+                axis = program_config.ops[2].attrs["axis"]
+                # bias should be [mul_y_shape[-1]]
+                if axis == 0 or bias_shape[0] != y_shape[1] or len(
+                        bias_shape) != 1:
+                    return True
+            return False
+
+        self.add_ignore_check_case(
+            teller1,
+            IgnoreReasons.PASS_ACCURACY_ERROR,
+            "The pass error on TRT while shape of bias is not [out_size].", )
+
+    def sample_program_config(self, draw):
+        # 1. Generate shape and attr of reshape2
+        reshape = draw(
+            st.lists(
+                st.integers(
+                    min_value=1, max_value=10), min_size=2, max_size=2))
+        x_shape = reshape + [1, 1]
+
+        # 2. Generate attr:transpose_X/transpose_Y/alpha of matmul
+        alpha = 1.0
+        transpose_X = False
+        transpose_Y = False
+
+        # 3. Generate legal shape of input:Y of matmul
+        y_shape = draw(
+            st.lists(
+                st.integers(
+                    min_value=1, max_value=8), min_size=2, max_size=2))
+        y_shape[0] = x_shape[1]
+
+        # 4. Generate legal attr:axis of elementwise_add
+        axis = draw(st.integers(min_value=-1, max_value=1))
+        if axis == 0:
+            bias_shape = [x_shape[0]]
+        elif axis == 1:
+            bias_shape = [y_shape[1]]
+        else:
+            bias_shape = [x_shape[0], y_shape[1]]
+            if draw(st.booleans()):
+                bias_shape[1] = 1
+
+        reshape2_op = OpConfig(
+            "reshape2",
+            inputs={"X": ["reshape2_x"], },
+            shape=reshape,
+            outputs={"Out": ["reshape2_out"],
+                     "XShape": ["xshape"]}, )
+        matmul_op = OpConfig(
+            "matmul",
+            inputs={"X": ["reshape2_out"],
+                    "Y": ["matmul_y"]},
+            outputs={"Out": ["matmul_out"]},
+            alpha=alpha,
+            transpose_X=transpose_X,
+            transpose_Y=transpose_Y,
+            fused_reshape_X=[],
+            fused_reshape_Y=[],
+            fused_transpose_X=[],
+            fused_transpose_Y=[],
+            fused_reshape_Out=[],
+            fused_transpose_Out=[], )
+
+        add_op = OpConfig(
+            "elementwise_add",
+            inputs={"X": ["matmul_out"],
+                    "Y": ["bias"]},
+            outputs={"Out": ["add_out"]},
+            axis=axis, )
+
+        ops = [reshape2_op, matmul_op, add_op]
+
+        if draw(st.integers(min_value=1, max_value=10)) <= 8:
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={
+                    "matmul_y": TensorConfig(shape=y_shape),
+                    "bias": TensorConfig(shape=bias_shape),
+                },
+                inputs={"reshape2_x": TensorConfig(shape=x_shape), },
+                outputs=ops[-1].outputs["Out"], )
+        else:
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={},
+                inputs={
+                    "reshape2_x": TensorConfig(shape=x_shape),
+                    "matmul_y": TensorConfig(shape=y_shape),
+                    "bias": TensorConfig(shape=bias_shape),
+                },
+                outputs=ops[-1].outputs["Out"], )
+        return program_config
+
+    def test(self):
+        self.run_and_statis(
+            quant=False,
+            max_examples=50,
+            max_duration=1000,
+            passes=["reshape2_matmul_fuse_pass"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_squeeze2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_squeeze2_matmul_fuse_pass.py
new file mode 100644
index 0000000000000000000000000000000000000000..605dc4edbe8c60dfc63838eeeb8f6936444e96d1
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_squeeze2_matmul_fuse_pass.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from auto_scan_test import PassAutoScanTest, IgnoreReasons
+from program_config import TensorConfig, ProgramConfig, OpConfig
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+import unittest
+
+import hypothesis
+from hypothesis import given, settings, seed, example, assume, reproduce_failure
+import hypothesis.strategies as st
+
+
+class TestSqueeze2MatmulFusePass(PassAutoScanTest):
+    """
+        x_var  
+          |          
+       squeeze2 
+          \
+    squeeze2_out_var    y_var
+             \           /
+                 matmul      bias_var
+                    \          /
+                   elementwise_add  
+    """
+
+    def sample_predictor_configs(self, program_config):
+        # cpu
+        config = self.create_inference_config(use_gpu=False)
+        yield config, ["mul", "elementwise_add"], (1e-5, 1e-5)
+
+        # for gpu
+        config = self.create_inference_config(use_gpu=True)
+        yield config, ["mul", "elementwise_add"], (1e-5, 1e-5)
+
+        # TRT
+        # config = self.create_trt_inference_config()
+        # config.enable_tensorrt_engine(
+        #     max_batch_size=10,
+        #     workspace_size=10240,
+        #     min_subgraph_size=0,
+        #     precision_mode=paddle_infer.PrecisionType.Float32,
+        #     use_static=False,
+        #     use_calib_mode=False)
+        # yield config, ['mul', 'elementwise_add'], (1e-5, 1e-5)
+
+    def add_ignore_pass_case(self):
+        # Here we put some skip rules to avoid known bugs
+        def teller1(program_config, predictor_config):
+            if predictor_config.tensorrt_engine_enabled():
+                # On 3080, the results of MatMul and Mul are different 
+                # When the input Y is weight
+                return True
+
+                # On TRT when the input Y is weight, Mul is converted to FC
+                predictor_config.exp_disable_tensorrt_ops(["elementwise_add"])
+                if "matmul_y" not in program_config.weights \
+                       or "bias" not in program_config.weights:
+                    return True
+
+                y_shape = list(program_config.weights["matmul_y"].shape)
+                bias_shape = program_config.weights["bias"].shape
+                axis = program_config.ops[2].attrs["axis"]
+                # bias should be [mul_y_shape[-1]]
+                if axis == 0 or bias_shape[0] != y_shape[1] or len(
+                        bias_shape) != 1:
+                    return True
+            return False
+
+        self.add_ignore_check_case(
+            teller1,
+            IgnoreReasons.PASS_ACCURACY_ERROR,
+            "The pass error on TRT while shape of bias is not [out_size].", )
+
+    def sample_program_config(self, draw):
+        # 1. Generate shape of input:X of squeeze2
+        x_shape = draw(
+            st.lists(
+                st.integers(
+                    min_value=1, max_value=8), min_size=2, max_size=2))
+        # axes of squeeze2 == [2, 3]
+        x_shape += [1, 1]
+        axes = [2, 3]
+
+        # 2. Generate attr:transpose_X/transpose_Y/alpha of matmul
+        alpha = 1.0
+        transpose_X = False
+        transpose_Y = False
+
+        # 3. Generate legal shape of input:Y of matmul
+        y_shape = draw(
+            st.lists(
+                st.integers(
+                    min_value=1, max_value=8), min_size=2, max_size=2))
+        y_shape[0] = x_shape[1]
+
+        # 4. Generate legal attr:axis of elementwise_add
+        axis = draw(st.integers(min_value=-1, max_value=1))
+        if axis == 0 or axis == -1:
+            bias_shape = [x_shape[0], y_shape[1]]
+        else:
+            bias_shape = [y_shape[1], ]
+        if draw(st.booleans()):
+            bias_shape[-1] = 1
+            if len(bias_shape) == 2 and draw(st.booleans()):
+                bias_shape[0] = 1
+
+        axis = 0
+        bias_shape = [2, ]
+        x_shape = [2, 1, 1, 1]
+        y_shape = [1, 2]
+
+        squeeze2_op = OpConfig(
+            "squeeze2",
+            inputs={"X": ["squeeze2_x"], },
+            axes=axes,
+            outputs={"Out": ["squeeze2_out"],
+                     "XShape": ["xshape"]}, )
+        matmul_op = OpConfig(
+            "matmul",
+            inputs={"X": ["squeeze2_out"],
+                    "Y": ["matmul_y"]},
+            outputs={"Out": ["matmul_out"]},
+            alpha=alpha,
+            transpose_X=transpose_X,
+            transpose_Y=transpose_Y,
+            fused_reshape_X=[],
+            fused_reshape_Y=[],
+            fused_transpose_X=[],
+            fused_transpose_Y=[],
+            fused_reshape_Out=[],
+            fused_transpose_Out=[], )
+
+        add_op = OpConfig(
+            "elementwise_add",
+            inputs={"X": ["matmul_out"],
+                    "Y": ["bias"]},
+            outputs={"Out": ["add_out"]},
+            axis=axis, )
+
+        ops = [squeeze2_op, matmul_op, add_op]
+
+        if draw(st.integers(min_value=1, max_value=10)) <= 8:
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={
+                    "matmul_y": TensorConfig(shape=y_shape),
+                    "bias": TensorConfig(shape=bias_shape),
+                },
+                inputs={"squeeze2_x": TensorConfig(shape=x_shape), },
+                outputs=ops[-1].outputs["Out"], )
+        else:
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={},
+                inputs={
+                    "squeeze2_x": TensorConfig(shape=x_shape),
+                    "matmul_y": TensorConfig(shape=y_shape),
+                    "bias": TensorConfig(shape=bias_shape),
+                },
+                outputs=ops[-1].outputs["Out"], )
+        return program_config
+
+    def test(self):
+        self.run_and_statis(
+            quant=False,
+            max_examples=50,
+            max_duration=1000,
+            passes=["squeeze2_matmul_fuse_pass"])
+
+
+if __name__ == "__main__":
+    unittest.main()