remove fluid.layers.mul (#49114)

* remove fluid.mul * remove mul in __all__

remove fluid.layers.mul (#49114)
* remove fluid.mul * remove mul in __all__
bfd0faef · zqw_1997 · GitHub · 5c9feef1 · bfd0faef · bfd0faef
15 changed file
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -73,7 +73,6 @@ __all__ = [
    'lod_reset',
    'clip',
    'clip_by_norm',
-    'mul',
    'merge_selected_rows',
    'get_tensor_from_selected_rows',
 ]
@@ -1682,64 +1681,6 @@ def merge_selected_rows(x, name=None):
    return out
-def mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None):
-    """
-    Mul Operator.
-    This operator is used to perform matrix multiplication for input $x$ and $y$.
-    The equation is:
-    ..  math::
-        Out = x * y
-    Both the input $x$ and $y$ can carry the LoD (Level of Details) information, or not. But the output only shares the LoD information with input $x$.
-    Args:
-        x (Variable): The first input Tensor/LoDTensor of mul_op.
-        y (Variable): The second input Tensor/LoDTensor of mul_op.
-        x_num_col_dims (int, optional): The mul_op can take tensors with more than two dimensions as its inputs. If the input $x$ is a tensor with more than two dimensions, $x$ will be flattened into a two-dimensional matrix first. The flattening rule is: the first `num_col_dims` will be flattened to form the first dimension of the final matrix (the height of the matrix), and the rest `rank(x) - num_col_dims` dimensions are flattened to form the second dimension of the final matrix (the width of the matrix). As a result, height of the flattened matrix is equal to the product of $x$'s first `x_num_col_dims` dimensions' sizes, and width of the flattened matrix is equal to the product of $x$'s last `rank(x) - num_col_dims` dimensions' size. For example, suppose $x$ is a 6-dimensional tensor with the shape [2, 3, 4, 5, 6], and `x_num_col_dims` = 3. Thus, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default is 1.
-        y_num_col_dims (int, optional): The mul_op can take tensors with more than two dimensions as its inputs. If the input $y$ is a tensor with more than two dimensions, $y$ will be flattened into a two-dimensional matrix first. The attribute `y_num_col_dims` determines how $y$ is flattened. See comments of `x_num_col_dims` for more details. Default is 1.
-        name (str, optional): Name of the output. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Default is None.
-    Returns:
-        Variable(Tensor/LoDTensor): The output Tensor/LoDTensor of mul op.
-    Examples:
-        ..  code-block:: python
-            import paddle.fluid as fluid
-            import paddle
-            paddle.enable_static()
-            dataX = fluid.layers.data(name="dataX", append_batch_size = False, shape=[2, 5], dtype="float32")
-            dataY = fluid.layers.data(name="dataY", append_batch_size = False, shape=[5, 3], dtype="float32")
-            output = fluid.layers.mul(dataX, dataY,
-                                      x_num_col_dims = 1,
-                                      y_num_col_dims = 1)
-    """
-    if _non_static_mode():
-        return _legacy_C_ops.mul(
-            x,
-            y,
-            'x_num_col_dims',
-            x_num_col_dims,
-            'y_num_col_dims',
-            y_num_col_dims,
-        )
-    inputs = {"X": [x], "Y": [y]}
-    attrs = {"x_num_col_dims": x_num_col_dims, "y_num_col_dims": y_num_col_dims}
-    helper = LayerHelper("mul", **locals())
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'mul')
-    check_variable_and_dtype(y, 'y', ['float16', 'float32', 'float64'], 'mul')
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type="mul", inputs={"X": x, "Y": y}, attrs=attrs, outputs={"Out": out}
-    )
-    return out
 @templatedoc()
 def get_tensor_from_selected_rows(x, name=None):
    """

--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py
@@ -383,7 +383,7 @@ class TestDistOpCost(unittest.TestCase):
                    [None, "x"],
                )
-                out1 = paddle.fluid.layers.mul(out, param1)  # [8, 8] [-1, -1]
+                out1 = paddle.matmul(out, param1)  # [8, 8] [-1, -1]
                tmp_param = paddle.create_parameter(
                    [8, 8], paddle.float32
                )  # [8, 8] [-1, -1]
@@ -393,10 +393,8 @@ class TestDistOpCost(unittest.TestCase):
                    [None, None],
                )
-                tmp_out = paddle.fluid.layers.mul(out1, tmp_param)
+                tmp_out = paddle.matmul(out1, tmp_param)
-                out2 = paddle.fluid.layers.mul(
+                out2 = paddle.matmul(tmp_out, param2)  # [8, 4] [-1, 0]
-                    tmp_out, param2
-                )  # [8, 4] [-1, 0]
                out8 = paddle.transpose(out2, [1, 0])  # [4, 8] [0, -1]

--- a/python/paddle/fluid/tests/unittests/ipu/test_mul_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_mul_op_ipu.py
-#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import unittest
-import numpy as np
-import paddle
-import paddle.static
-from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest
-class TestBase(IPUOpTest):
-    def setUp(self):
-        self.set_atol()
-        self.set_training()
-        self.set_data_feed()
-        self.set_feed_attr()
-        self.set_op_attrs()
-    def set_data_feed(self):
-        x = np.random.uniform(size=[2, 5])
-        y = np.random.uniform(size=[5, 3])
-        self.feed_fp32 = {"x": x.astype(np.float32), "y": y.astype(np.float32)}
-        self.feed_fp16 = {"x": x.astype(np.float16), "y": y.astype(np.float16)}
-    def set_feed_attr(self):
-        self.feed_shape = [x.shape for x in self.feed_fp32.values()]
-        self.feed_list = list(self.feed_fp32.keys())
-        self.feed_dtype = [x.dtype for x in self.feed_fp32.values()]
-    def set_op_attrs(self):
-        self.attrs = {
-            "x_num_col_dims": 1,
-            "y_num_col_dims": 1,
-        }
-    @IPUOpTest.static_graph
-    def build_model(self):
-        x = paddle.static.data(
-            name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
-        )
-        y = paddle.static.data(
-            name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
-        )
-        out = paddle.fluid.layers.mul(x, y, **self.attrs)
-        self.fetch_list = [out.name]
-    def run_model(self, exec_mode):
-        self.run_op_test(exec_mode)
-    def test(self):
-        for m in IPUOpTest.ExecutionMode:
-            if not self.skip_mode(m):
-                self.build_model()
-                self.run_model(m)
-        self.check()
-class TestCase1(TestBase):
-    def set_data_feed(self):
-        x = np.random.uniform(size=[1, 2, 5])
-        y = np.random.uniform(size=[5, 3])
-        self.feed_fp32 = {"x": x.astype(np.float32), "y": y.astype(np.float32)}
-        self.feed_fp16 = {"x": x.astype(np.float16), "y": y.astype(np.float16)}
-    def set_op_attrs(self):
-        self.attrs = {
-            "x_num_col_dims": 2,
-            "y_num_col_dims": 1,
-        }
-class TestCase2(TestBase):
-    def set_data_feed(self):
-        x = np.random.uniform(size=[3, 4, 2, 9])
-        y = np.random.uniform(size=[3, 6, 1, 2, 3])
-        self.feed_fp32 = {"x": x.astype(np.float32), "y": y.astype(np.float32)}
-        self.feed_fp16 = {"x": x.astype(np.float16), "y": y.astype(np.float16)}
-    def set_op_attrs(self):
-        self.attrs = {
-            'x_num_col_dims': 2,
-            'y_num_col_dims': 2,
-        }
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
@@ -33,7 +33,7 @@ class FusionGroupPassTest(PassTest):
            # subgraph with only 1 op node
            tmp_0 = self.feed_vars[0] * self.feed_vars[1]
-            tmp_1 = layers.mul(tmp_0, self.feed_vars[2])
+            tmp_1 = paddle.matmul(tmp_0, self.feed_vars[2])
            # subgraph with 2 op nodes
            tmp_2 = paddle.nn.functional.relu(tmp_0 + tmp_1)
@@ -114,7 +114,7 @@ class FusionGroupPassInplaceTest(FusionGroupPassTest):
            tmp_0 = self.feed_vars[0] - self.feed_vars[1]
            tmp_1 = tmp_0 * self.feed_vars[2]
            tmp_2 = layers.assign(tmp_1, output=tmp_0)
-            tmp_3 = layers.mul(tmp_2, self.feed_vars[3])
+            tmp_3 = paddle.matmul(tmp_2, self.feed_vars[3])
        self.num_fused_ops = 1
        self.fetch_list = [tmp_3]
@@ -143,7 +143,7 @@ class FusionGroupPassTestCastAndFP16(FusionGroupPassTest):
            # TODO(xreki): fix precision problem when using softmax of float16.
            # tmp_2 = layers.softmax(tmp_1)
            tmp_2 = paddle.add(tmp_1, zero)
-            tmp_3 = layers.mul(tmp_0, self.feed_vars[2])
+            tmp_3 = paddle.matmul(tmp_0, self.feed_vars[2])
            # subgraph with 4 op nodes
            tmp_3 = layers.cast(tmp_2, dtype="float16")
            tmp_4 = paddle.nn.functional.relu(tmp_1 + tmp_3)
@@ -169,7 +169,7 @@ class FusionGroupPassSumTest(FusionGroupPassTest):
                [self.feed_vars[0], self.feed_vars[1], self.feed_vars[2]]
            )
            tmp_1 = paddle.sqrt(tmp_0)
-            tmp_2 = layers.mul(tmp_0, self.feed_vars[3])
+            tmp_2 = paddle.matmul(tmp_0, self.feed_vars[3])
            # subgraph with 2 op nodes
            tmp_3 = paddle.square(paddle.add_n([tmp_1, tmp_2]))

--- a/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py
@@ -214,322 +214,5 @@ class TestMul4FP16(TestMul4):
        pass
-class TestMulNet(unittest.TestCase):
-    def init_dtype(self):
-        self.dtype = np.float32
-    def _test(self, run_npu=True):
-        main_prog = paddle.static.Program()
-        startup_prog = paddle.static.Program()
-        main_prog.random_seed = SEED
-        startup_prog.random_seed = SEED
-        np.random.seed(SEED)
-        a_np = np.random.random(size=(2, 3)).astype(self.dtype)
-        b_np = np.random.random(size=(2, 3)).astype(self.dtype)
-        c_np = np.random.random(size=(3, 2)).astype(self.dtype)
-        d_np = np.random.random(size=(3, 2)).astype(self.dtype)
-        label_np = np.random.randint(2, size=(2, 1)).astype('int64')
-        with paddle.static.program_guard(main_prog, startup_prog):
-            a = paddle.static.data(name="a", shape=[2, 3], dtype=self.dtype)
-            b = paddle.static.data(name="b", shape=[2, 3], dtype=self.dtype)
-            c = paddle.static.data(name="c", shape=[3, 2], dtype=self.dtype)
-            d = paddle.static.data(name="d", shape=[3, 2], dtype=self.dtype)
-            label = paddle.static.data(
-                name="label", shape=[2, 1], dtype='int64'
-            )
-            sum_1 = paddle.add(a, b)
-            sum_2 = paddle.add(c, d)
-            result = paddle.fluid.layers.mul(sum_1, sum_2)
-            fc_1 = fluid.layers.fc(input=result, size=8)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
-            cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
-            loss = paddle.mean(cost)
-            sgd = fluid.optimizer.SGD(learning_rate=0.01)
-            sgd.minimize(loss)
-        if run_npu:
-            place = paddle.NPUPlace(0)
-        else:
-            place = paddle.CPUPlace()
-        exe = paddle.static.Executor(place)
-        exe.run(startup_prog)
-        print("TestMulNet Start run on {} . ".format(place))
-        for epoch in range(100):
-            pred_res, loss_res = exe.run(
-                main_prog,
-                feed={
-                    "a": a_np,
-                    "b": b_np,
-                    "c": c_np,
-                    "d": d_np,
-                    "label": label_np,
-                },
-                fetch_list=[prediction, loss],
-            )
-            if epoch % 10 == 0:
-                print(
-                    "Epoch {} | Prediction[0]: {}, Loss: {}".format(
-                        epoch, pred_res[0], loss_res
-                    )
-                )
-        return pred_res, loss_res
-    def test_npu(self):
-        self.init_dtype()
-        cpu_pred, cpu_loss = self._test(False)
-        npu_pred, npu_loss = self._test(True)
-        np.testing.assert_allclose(npu_pred, cpu_pred, rtol=1e-6)
-        np.testing.assert_allclose(npu_loss, cpu_loss, rtol=1e-6)
-class TestMulNet3_2(unittest.TestCase):
-    def init_dtype(self):
-        self.dtype = np.float32
-    def _test(self, run_npu=True):
-        main_prog = paddle.static.Program()
-        startup_prog = paddle.static.Program()
-        main_prog.random_seed = SEED
-        startup_prog.random_seed = SEED
-        np.random.seed(SEED)
-        a_np = np.random.random(size=(2, 3, 4)).astype(self.dtype)
-        b_np = np.random.random(size=(2, 3, 4)).astype(self.dtype)
-        c_np = np.random.random(size=(12, 5)).astype(self.dtype)
-        d_np = np.random.random(size=(12, 5)).astype(self.dtype)
-        label_np = np.random.randint(2, size=(2, 1)).astype('int64')
-        with paddle.static.program_guard(main_prog, startup_prog):
-            a = paddle.static.data(name="a", shape=[2, 3, 4], dtype=self.dtype)
-            b = paddle.static.data(name="b", shape=[2, 3, 4], dtype=self.dtype)
-            c = paddle.static.data(name="c", shape=[12, 5], dtype=self.dtype)
-            d = paddle.static.data(name="d", shape=[12, 5], dtype=self.dtype)
-            label = paddle.static.data(
-                name="label", shape=[2, 1], dtype='int64'
-            )
-            sum_1 = paddle.add(a, b)
-            sum_2 = paddle.add(c, d)
-            result = paddle.fluid.layers.mul(sum_1, sum_2)
-            fc_1 = fluid.layers.fc(input=result, size=8)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
-            cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
-            loss = paddle.mean(cost)
-            sgd = fluid.optimizer.SGD(learning_rate=0.01)
-            sgd.minimize(loss)
-        if run_npu:
-            place = paddle.NPUPlace(0)
-        else:
-            place = paddle.CPUPlace()
-        exe = paddle.static.Executor(place)
-        exe.run(startup_prog)
-        print("testMulNet3_2 tart run on {}".format(place))
-        for epoch in range(100):
-            pred_res, loss_res = exe.run(
-                main_prog,
-                feed={
-                    "a": a_np,
-                    "b": b_np,
-                    "c": c_np,
-                    "d": d_np,
-                    "label": label_np,
-                },
-                fetch_list=[prediction, loss],
-            )
-            if epoch % 10 == 0:
-                print(
-                    "Epoch {} | Prediction[0]: {}, Loss: {}".format(
-                        epoch, pred_res[0], loss_res
-                    )
-                )
-        return pred_res, loss_res
-    def test_npu(self):
-        self.init_dtype()
-        cpu_pred, cpu_loss = self._test(False)
-        npu_pred, npu_loss = self._test(True)
-        np.testing.assert_allclose(
-            npu_pred, cpu_pred, atol=1e-5
-        )  # atol needed on cann 20.3
-        np.testing.assert_allclose(npu_loss, cpu_loss, atol=1e-5)
-class TestMulNet3_2_xc2(unittest.TestCase):
-    def init_dtype(self):
-        self.dtype = np.float32
-    def _test(self, run_npu=True):
-        main_prog = paddle.static.Program()
-        startup_prog = paddle.static.Program()
-        main_prog.random_seed = SEED
-        startup_prog.random_seed = SEED
-        np.random.seed(SEED)
-        a_np = np.random.random(size=(2, 3, 4)).astype(self.dtype)
-        b_np = np.random.random(size=(2, 3, 4)).astype(self.dtype)
-        c_np = np.random.random(size=(4, 5)).astype(self.dtype)
-        d_np = np.random.random(size=(4, 5)).astype(self.dtype)
-        label_np = np.random.randint(2, size=(2, 1)).astype('int64')
-        with paddle.static.program_guard(main_prog, startup_prog):
-            a = paddle.static.data(name="a", shape=[2, 3, 4], dtype=self.dtype)
-            b = paddle.static.data(name="b", shape=[2, 3, 4], dtype=self.dtype)
-            c = paddle.static.data(name="c", shape=[4, 5], dtype=self.dtype)
-            d = paddle.static.data(name="d", shape=[4, 5], dtype=self.dtype)
-            label = paddle.static.data(
-                name="label", shape=[2, 1], dtype='int64'
-            )
-            sum_1 = paddle.add(a, b)
-            sum_2 = paddle.add(c, d)
-            result = paddle.fluid.layers.mul(sum_1, sum_2, x_num_col_dims=2)
-            result_re = paddle.reshape(result, shape=[2, 15])
-            fc_1 = fluid.layers.fc(input=result_re, size=8)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
-            cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
-            loss = paddle.mean(cost)
-            sgd = fluid.optimizer.SGD(learning_rate=0.01)
-            sgd.minimize(loss)
-        if run_npu:
-            place = paddle.NPUPlace(0)
-        else:
-            place = paddle.CPUPlace()
-        exe = paddle.static.Executor(place)
-        exe.run(startup_prog)
-        print("TestMulNet3_2_xc2. Start run on {}".format(place))
-        for epoch in range(100):
-            pred_res, loss_res = exe.run(
-                main_prog,
-                feed={
-                    "a": a_np,
-                    "b": b_np,
-                    "c": c_np,
-                    "d": d_np,
-                    "label": label_np,
-                },
-                fetch_list=[prediction, loss],
-            )
-            if epoch % 10 == 0:
-                print(
-                    "Epoch {} | Prediction[0]: {}, Loss: {}".format(
-                        epoch, pred_res[0], loss_res
-                    )
-                )
-        return pred_res, loss_res
-    def test_npu(self):
-        self.init_dtype()
-        cpu_pred, cpu_loss = self._test(False)
-        npu_pred, npu_loss = self._test(True)
-        np.testing.assert_allclose(npu_pred, cpu_pred, rtol=1e-6)
-        np.testing.assert_allclose(npu_loss, cpu_loss, rtol=1e-6)
-class TestMulNet4_2(unittest.TestCase):
-    def init_dtype(self):
-        self.dtype = np.float32
-    def _test(self, run_npu=True):
-        main_prog = paddle.static.Program()
-        startup_prog = paddle.static.Program()
-        main_prog.random_seed = SEED
-        startup_prog.random_seed = SEED
-        np.random.seed(SEED)
-        a_np = np.random.random(size=(12, 5)).astype(self.dtype)
-        b_np = np.random.random(size=(12, 5)).astype(self.dtype)
-        c_np = np.random.random(size=(12, 5)).astype(self.dtype)
-        d_np = np.random.random(size=(12, 5)).astype(self.dtype)
-        label_np = np.random.randint(2, size=(2, 1)).astype('int64')
-        with paddle.static.program_guard(main_prog, startup_prog):
-            a = paddle.static.data(name="a", shape=[12, 5], dtype=self.dtype)
-            b = paddle.static.data(name="b", shape=[12, 5], dtype=self.dtype)
-            c = paddle.static.data(name="c", shape=[12, 5], dtype=self.dtype)
-            d = paddle.static.data(name="d", shape=[12, 5], dtype=self.dtype)
-            label = paddle.static.data(
-                name="label", shape=[2, 1], dtype='int64'
-            )
-            sum_1 = paddle.add(a, b)  # [12, 5]
-            sum_2 = paddle.add(c, d)  # [12, 5]
-            fc_1 = fluid.layers.fc(input=sum_1, size=2)  # [12, 2]
-            fc_1_re_shape = paddle.reshape(fc_1, shape=[2, 3, 2, 2])
-            fc_2 = fluid.layers.fc(input=sum_2, size=2)  # [12, 2]
-            result = paddle.fluid.layers.mul(
-                fc_1_re_shape, fc_2
-            )  # [2, 3, 2, 2] * [12, 2]
-            prediction = fluid.layers.fc(input=result, size=2, act='softmax')
-            cost = paddle.nn.functional.cross_entropy(input=prediction, label=label, reduction='none', use_softmax=False)
-            loss = paddle.mean(cost)
-            sgd = fluid.optimizer.SGD(learning_rate=0.01)
-            sgd.minimize(loss)
-        if run_npu:
-            place = paddle.NPUPlace(0)
-        else:
-            place = paddle.CPUPlace()
-        exe = paddle.static.Executor(place)
-        exe.run(startup_prog)
-        print("testMulNet4_2 tart run on {}".format(place))
-        for epoch in range(100):
-            pred_res, loss_res = exe.run(
-                main_prog,
-                feed={
-                    "a": a_np,
-                    "b": b_np,
-                    "c": c_np,
-                    "d": d_np,
-                    "label": label_np,
-                },
-                fetch_list=[prediction, loss],
-            )
-            if epoch % 10 == 0:
-                print(
-                    "Epoch {} | Prediction[0]: {}, Loss: {}".format(
-                        epoch, pred_res[0], loss_res
-                    )
-                )
-        return pred_res, loss_res
-    def test_npu(self):
-        self.init_dtype()
-        cpu_pred, cpu_loss = self._test(False)
-        npu_pred, npu_loss = self._test(True)
-        np.testing.assert_allclose(
-            npu_pred, cpu_pred, atol=1e-5
-        )  # atol needed on cann 20.3
-        np.testing.assert_allclose(npu_loss, cpu_loss, atol=1e-5)
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
@@ -71,7 +71,7 @@ class MLPLayer(nn.Layer):
        out = self.linear1(out)
        param = paddle.create_parameter([1024, 4096], paddle.float32)
        auto.shard_tensor(param, PP_MESH_1, [None, "y"])
-        out = paddle.fluid.layers.mul(out, param)
+        out = paddle.matmul(out, param)
        return out

--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
@@ -81,7 +81,7 @@ class MLPLayer(nn.Layer):
        out = self.linear0(w_out)
        param = paddle.create_parameter([4096, 4096], paddle.float32)
        auto.shard_tensor(param, PP_MESH_0, ["x", None])
-        out = paddle.fluid.layers.mul(out, param)
+        out = paddle.matmul(out, param)
        gelu_out = F.gelu(out, approximate=True)
        out = self.linear1(gelu_out)
        out1 = self.linear2(gelu_out)

--- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py
@@ -18,7 +18,6 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid.backward import calc_gradient
@@ -29,7 +28,7 @@ class TestCalcGradient(unittest.TestCase):
        with fluid.program_guard(main, startup):
            x = paddle.create_parameter(dtype="float32", shape=[5, 10])
            y = paddle.create_parameter(dtype="float32", shape=[10, 8])
-            mul_out = layers.mul(x=x, y=y)
+            mul_out = paddle.matmul(x=x, y=y)
            mean_out = paddle.mean(mul_out)
            a = calc_gradient(mean_out, mul_out)
            b = calc_gradient(mean_out, x)

--- a/python/paddle/fluid/tests/unittests/test_executor_and_mul.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_and_mul.py
@@ -18,7 +18,7 @@ import numpy as np
 import paddle
 from paddle.fluid.executor import Executor
-from paddle.fluid.layers import data, mul, zeros
+from paddle.fluid.layers import data, zeros
 from paddle.tensor import array_write
@@ -35,7 +35,7 @@ class TestExecutor(unittest.TestCase):
        array_write(x=b, i=i, array=array)
        i = paddle.increment(i)
-        out = mul(x=a, y=b)
+        out = paddle.matmul(x=a, y=b)
        array_write(x=out, i=i, array=array)
        a_np = np.random.random((100, 784)).astype('float32')

--- a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
@@ -17,8 +17,8 @@ import unittest
 import numpy as np
 from test_eager_deletion_padding_rnn import PaddingRNNTestBase, RNNConfig
+import paddle
 import paddle.fluid as fluid
-import paddle.fluid.core as core
 class TestExecutor(unittest.TestCase):
@@ -33,14 +33,14 @@ class TestExecutor(unittest.TestCase):
                dtype='float32',
                append_batch_size=False,
            )
-            output = fluid.layers.mul(x=a, y=b)
+            output = paddle.matmul(x=a, y=b)
        # Compute with numpy
        a_np = np.random.random((100, 784)).astype('float32')
        b_np = np.random.random((784, 100)).astype('float32')
        out_np = np.dot(a_np, b_np)
-        place = core.CPUPlace()
+        place = paddle.CPUPlace()
        exe = fluid.Executor(place)
        def _train(use_program_cache, max_iters=1):

--- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
@@ -45,7 +45,7 @@ class AutoPruneLayer0(fluid.Layer):
    def forward(self, x, y):
        a = self.linear1(x)
        b = self.linear2(y)
-        c = fluid.layers.mul(a, b)
+        c = paddle.matmul(a, b)
        d = paddle.mean(c)
        return d
@@ -74,7 +74,7 @@ class AutoPruneLayer1(fluid.Layer):
        a = self.linear1(x)
        b = self.linear2(y)
        b.stop_gradient = True
-        c = fluid.layers.mul(a, b)
+        c = paddle.matmul(a, b)
        d = paddle.mean(c)
        return d

--- a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py
@@ -28,7 +28,7 @@ class RecurrentTest(fluid.Layer):
        super().__init__(name_scope)
    def forward(self, in1, in2):
-        out = fluid.layers.mul(in1, in2)
+        out = paddle.matmul(in1, in2)
        sum_out = paddle.sum(out)
        return sum_out, out

--- a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
@@ -21,58 +21,10 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 paddle.enable_static()
-class TestMulGradCheck(unittest.TestCase):
-    @prog_scope()
-    def func(self, place):
-        prog = fluid.Program()
-        with fluid.program_guard(prog):
-            x = paddle.create_parameter(dtype="float64", shape=[2, 8], name='x')
-            y = paddle.create_parameter(dtype="float64", shape=[8, 4], name='y')
-            z = layers.mul(x=x, y=y)
-            gradient_checker.grad_check([x, y], z, place=place)
-    def test_grad(self):
-        places = [fluid.CPUPlace()]
-        if core.is_compiled_with_cuda():
-            places.append(fluid.CUDAPlace(0))
-        for p in places:
-            self.func(p)
-class TestMulDoubleGradCheck(unittest.TestCase):
-    @prog_scope()
-    def func(self, place):
-        # the shape of input variable should be clearly specified, not inlcude -1.
-        x_shape = [7, 11]
-        y_shape = [11, 9]
-        eps = 0.005
-        dtype = np.float64
-        x = layers.data('x', x_shape, False, dtype)
-        x.persistable = True
-        y = layers.data('y', y_shape, False, dtype)
-        y.persistable = True
-        out = layers.mul(x, y)
-        x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
-        y_arr = np.random.uniform(-1, 1, y_shape).astype(dtype)
-        gradient_checker.double_grad_check(
-            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps
-        )
-    def test_grad(self):
-        places = [fluid.CPUPlace()]
-        if core.is_compiled_with_cuda():
-            places.append(fluid.CUDAPlace(0))
-        for p in places:
-            self.func(p)
 class TestMatmulDoubleGradCheck(unittest.TestCase):
    def setUp(self):
        self.init_test()

--- a/python/paddle/fluid/tests/unittests/test_mul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mul_op.py
@@ -22,9 +22,6 @@ import paddle.fluid.core as core
 sys.path.append("..")
 from op_test import OpTest
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 class TestMulOp(OpTest):
    def setUp(self):
@@ -57,23 +54,6 @@ class TestMulOp(OpTest):
        )
-class TestMulOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            # The input type of mul_op must be Variable.
-            x1 = fluid.create_lod_tensor(
-                np.array([[-1]]), [[1]], fluid.CPUPlace()
-            )
-            x2 = fluid.create_lod_tensor(
-                np.array([[-1]]), [[1]], fluid.CPUPlace()
-            )
-            self.assertRaises(TypeError, fluid.layers.mul, x1, x2)
-            # The input dtype of mul_op must be float32 or float64.
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype="int32")
-            x4 = fluid.layers.data(name='x4', shape=[4], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.mul, x3, x4)
 class TestMulOp2(OpTest):
    def setUp(self):
        self.op_type = "mul"

--- a/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py
@@ -22,9 +22,6 @@ import paddle
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 paddle.enable_static()
 from xpu.get_test_cover_info import (
@@ -34,23 +31,6 @@ from xpu.get_test_cover_info import (
 )
-class TestMulOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            # The input type of mul_op must be Variable.
-            x1 = fluid.create_lod_tensor(
-                np.array([[-1]]), [[1]], fluid.XPUPlace(0)
-            )
-            x2 = fluid.create_lod_tensor(
-                np.array([[-1]]), [[1]], fluid.XPUPlace(0)
-            )
-            self.assertRaises(TypeError, fluid.layers.mul, x1, x2)
-            # The input dtype of mul_op must be float32.
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype="int32")
-            x4 = fluid.layers.data(name='x4', shape=[4], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.mul, x3, x4)
 class XPUTestMulOp(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'mul'