[MLU]add mlu kernel for tile op (#43389)

539a9e60 · fuyou765 · GitHub · 6a179e48 · 539a9e60 · 539a9e60
3 changed file
--- a/paddle/fluid/operators/tile_op_functor.h
+++ b/paddle/fluid/operators/tile_op_functor.h
@@ -30,6 +30,7 @@ inline std::vector<int> get_repeat_times(
    framework::Tensor cpu_repeat_tensor;
    if (platform::is_gpu_place(repeat_tensor->place()) ||
        platform::is_xpu_place(repeat_tensor->place()) ||
+        platform::is_mlu_place(repeat_tensor->place()) ||
        platform::is_npu_place(repeat_tensor->place())) {
      paddle::framework::TensorCopySync(*repeat_tensor, platform::CPUPlace(),
                                        &cpu_repeat_tensor);
@@ -49,6 +50,7 @@ inline std::vector<int> get_repeat_times(
      auto tensor = list_repeat_times_tensor[i];
      if (platform::is_gpu_place(tensor->place()) ||
          platform::is_xpu_place(tensor->place()) ||
+          platform::is_mlu_place(tensor->place()) ||
          platform::is_npu_place(tensor->place())) {
        framework::Tensor temp;
        paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);

--- a/paddle/fluid/operators/tile_op_mlu.cc
+++ b/paddle/fluid/operators/tile_op_mlu.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PADDLE_WITH_MLU
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/mlu/mlu_baseop.h"
+#include "paddle/fluid/operators/tile_op_functor.h"
+namespace paddle {
+namespace operators {
+using Tensor = framework::Tensor;
+template <typename T>
+class TileMLUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto rank = context.Input<Tensor>("X")->dims().size();
+    PADDLE_ENFORCE_GE(
+        rank, 1,
+        platform::errors::InvalidArgument(
+            "The rank of the input 'x' for tile op must be a positive "
+            "integer, but the value received is %d.",
+            rank));
+    PADDLE_ENFORCE_LE(
+        rank, MAX_RANK_SUPPORTED,
+        platform::errors::InvalidArgument(
+            "The rank of the input 'x' for tile op "
+            "must be less than or equal to %d, but the value received is %d.",
+            MAX_RANK_SUPPORTED, rank));
+    auto repeat_times = get_repeat_times(context);
+    int repeat_times_size = repeat_times.size();
+    PADDLE_ENFORCE_GE(
+        repeat_times_size, 1,
+        platform::errors::InvalidArgument(
+            "The number of elements of the input 'repeat_times' for tile "
+            "op must be positive, but the value received is %d.",
+            repeat_times_size));
+    PADDLE_ENFORCE_LE(
+        repeat_times_size, MAX_RANK_SUPPORTED,
+        platform::errors::InvalidArgument(
+            "The number of elements of the input 'repeat_times' for tile op "
+            "must be less than or equal to %d, but the value received is %d.",
+            MAX_RANK_SUPPORTED, repeat_times_size));
+    auto* in0 = context.Input<framework::Tensor>("X");
+    auto in_dims = in0->dims();
+    for (size_t i = 0; i < repeat_times.size(); ++i) {
+      PADDLE_ENFORCE_GT(
+          repeat_times[i], 0,
+          platform::errors::InvalidArgument(
+              "All elements of the input 'repeat_times' for tile op must "
+              "be positive integers, but the value received is %d.",
+              repeat_times[i]));
+    }
+    auto vec_in_dims = phi::vectorize<int>(in_dims);
+    if (repeat_times.size() < vec_in_dims.size()) {
+      int diff = vec_in_dims.size() - repeat_times.size();
+      repeat_times.insert(repeat_times.begin(), diff, 1);
+    } else {
+      int diff = repeat_times.size() - vec_in_dims.size();
+      vec_in_dims.insert(vec_in_dims.begin(), diff, 1);
+    }
+    PADDLE_ENFORCE_EQ(
+        repeat_times.size(), vec_in_dims.size(),
+        platform::errors::InvalidArgument(
+            "The rank (%d) of the input 'x' and the rank (%d) of the input "
+            "'repeat_times' for tile op must match after promotion.",
+            vec_in_dims.size(), repeat_times.size()));
+    auto* out0 = context.Output<framework::Tensor>("Out");
+    bool repeat_one_times = true;
+    for (size_t i = 0; i < repeat_times.size(); ++i) {
+      if (repeat_times[i] != 1) {
+        repeat_one_times = false;
+      }
+    }
+    if (repeat_one_times) {
+      paddle::framework::TensorCopy(*in0, context.GetPlace(), out0);
+    } else {
+      framework::DDim new_in_dims = phi::make_ddim(vec_in_dims);
+      framework::DDim out_dims(new_in_dims);
+      for (size_t i = 0; i < repeat_times.size(); ++i) {
+        out_dims[i] *= repeat_times[i];
+      }
+      out0->Resize(out_dims);
+      out0->mutable_data<T>(context.GetPlace());
+      MLUCnnlTensorDesc x_desc(*in0);
+      MLUCnnlTensorDesc out_desc(*out0);
+      MLUCnnl::BroadcastTo(context, x_desc.get(), GetBasePtr(in0),
+                           out_desc.get(), GetBasePtr(out0));
+    }
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OP_MLU_KERNEL(tile, ops::TileMLUKernel<bool>, ops::TileMLUKernel<int>,
+                       ops::TileMLUKernel<int64_t>, ops::TileMLUKernel<float>);
+#endif
--- a/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import sys
+sys.path.append("..")
+import unittest
+import numpy as np
+from op_test import OpTest
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import compiler, Program, program_guard
+#Situation 1: repeat_times is a list (without tensor)
+class TestTileOpRank1(OpTest):
+    def setUp(self):
+        self.op_type = "tile"
+        self.place = paddle.device.MLUPlace(0)
+        self.__class__.use_mlu = True
+        self.init_data()
+        self.inputs = {'X': np.random.random(self.ori_shape).astype("float32")}
+        self.attrs = {'repeat_times': self.repeat_times}
+        output = np.tile(self.inputs['X'], self.repeat_times)
+        self.outputs = {'Out': output}
+    def init_data(self):
+        self.ori_shape = [100]
+        self.repeat_times = [2]
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')
+# with dimension expanding
+class TestTileOpRank2Expanding(TestTileOpRank1):
+    def init_data(self):
+        self.ori_shape = [120]
+        self.repeat_times = [2, 2]
+class TestTileOpRank2(TestTileOpRank1):
+    def init_data(self):
+        self.ori_shape = [12, 14]
+        self.repeat_times = [2, 3]
+class TestTileOpRank3_Corner(TestTileOpRank1):
+    def init_data(self):
+        self.ori_shape = (2, 10, 5)
+        self.repeat_times = (1, 1, 1)
+class TestTileOpRank3_Corner2(TestTileOpRank1):
+    def init_data(self):
+        self.ori_shape = (2, 10, 5)
+        self.repeat_times = (2, 2)
+class TestTileOpRank3(TestTileOpRank1):
+    def init_data(self):
+        self.ori_shape = (2, 4, 15)
+        self.repeat_times = (2, 1, 4)
+class TestTileOpRank4(TestTileOpRank1):
+    def init_data(self):
+        self.ori_shape = (2, 4, 5, 7)
+        self.repeat_times = (3, 2, 1, 2)
+# Situation 2: repeat_times is a list (with tensor)
+class TestTileOpRank1_tensor_attr(OpTest):
+    def setUp(self):
+        self.op_type = "tile"
+        self.place = paddle.device.MLUPlace(0)
+        self.__class__.use_mlu = True
+        self.init_data()
+        repeat_times_tensor = []
+        for index, ele in enumerate(self.repeat_times):
+            repeat_times_tensor.append(("x" + str(index), np.ones(
+                (1)).astype('int32') * ele))
+        self.inputs = {
+            'X': np.random.random(self.ori_shape).astype("float32"),
+            'repeat_times_tensor': repeat_times_tensor,
+        }
+        self.attrs = {"repeat_times": self.infer_repeat_times}
+        output = np.tile(self.inputs['X'], self.repeat_times)
+        self.outputs = {'Out': output}
+    def init_data(self):
+        self.ori_shape = [100]
+        self.repeat_times = [2]
+        self.infer_repeat_times = [-1]
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')
+class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr):
+    def init_data(self):
+        self.ori_shape = [12, 14]
+        self.repeat_times = [1, 1]
+        self.infer_repeat_times = [1, -1]
+class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr):
+    def init_data(self):
+        self.ori_shape = [12, 14]
+        self.repeat_times = [2, 3]
+        self.infer_repeat_times = [-1, 3]
+# Situation 3: repeat_times is a tensor
+class TestTileOpRank1_tensor(OpTest):
+    def setUp(self):
+        self.op_type = "tile"
+        self.place = paddle.device.MLUPlace(0)
+        self.__class__.use_mlu = True
+        self.init_data()
+        self.inputs = {
+            'X': np.random.random(self.ori_shape).astype("float32"),
+            'RepeatTimes': np.array(self.repeat_times).astype("int32"),
+        }
+        self.attrs = {}
+        output = np.tile(self.inputs['X'], self.repeat_times)
+        self.outputs = {'Out': output}
+    def init_data(self):
+        self.ori_shape = [100]
+        self.repeat_times = [2]
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')
+class TestTileOpRank2_tensor(TestTileOpRank1_tensor):
+    def init_data(self):
+        self.ori_shape = [12, 14]
+        self.repeat_times = [2, 3]
+# Situation 4: input x is Integer
+class TestTileOpInteger(OpTest):
+    def setUp(self):
+        self.op_type = "tile"
+        self.place = paddle.device.MLUPlace(0)
+        self.__class__.use_mlu = True
+        self.inputs = {
+            'X': np.random.randint(10, size=(4, 4, 5)).astype("int32")
+        }
+        self.attrs = {'repeat_times': [2, 1, 4]}
+        output = np.tile(self.inputs['X'], (2, 1, 4))
+        self.outputs = {'Out': output}
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+# Situation 5: input x is Bool
+class TestTileOpBoolean(OpTest):
+    def setUp(self):
+        self.op_type = "tile"
+        self.place = paddle.device.MLUPlace(0)
+        self.__class__.use_mlu = True
+        self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")}
+        self.attrs = {'repeat_times': [2, 1, 4]}
+        output = np.tile(self.inputs['X'], (2, 1, 4))
+        self.outputs = {'Out': output}
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+# Situation 56: input x is Integer
+class TestTileOpInt64_t(OpTest):
+    def setUp(self):
+        self.op_type = "tile"
+        self.place = paddle.device.MLUPlace(0)
+        self.__class__.use_mlu = True
+        self.inputs = {
+            'X': np.random.randint(10, size=(2, 4, 5)).astype("int64")
+        }
+        self.attrs = {'repeat_times': [2, 1, 4]}
+        output = np.tile(self.inputs['X'], (2, 1, 4))
+        self.outputs = {'Out': output}
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+class TestTileError(unittest.TestCase):
+    def test_errors(self):
+        with program_guard(Program(), Program()):
+            x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]],
+                                         fluid.CPUPlace())
+            repeat_times = [2, 2]
+            self.assertRaises(TypeError, paddle.tile, x1, repeat_times)
+            x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
+            self.assertRaises(TypeError, paddle.tile, x2, repeat_times)
+            x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool")
+            x3.stop_gradient = False
+            self.assertRaises(ValueError, paddle.tile, x3, repeat_times)
+class TestTileAPIStatic(unittest.TestCase):
+    def test_api(self):
+        with program_guard(Program(), Program()):
+            repeat_times = [2, 2]
+            x1 = fluid.layers.data(name='x1', shape=[4], dtype="int32")
+            out = paddle.tile(x1, repeat_times)
+            positive_2 = fluid.layers.fill_constant([1], dtype="int32", value=2)
+            out2 = paddle.tile(x1, repeat_times=[positive_2, 2])
+# Test python API
+class TestTileAPI(unittest.TestCase):
+    def test_api(self):
+        with fluid.dygraph.guard():
+            np_x = np.random.random([12, 14]).astype("float32")
+            x = paddle.to_tensor(np_x)
+            positive_2 = np.array([2]).astype("int32")
+            positive_2 = paddle.to_tensor(positive_2)
+            repeat_times = np.array([2, 3]).astype("int32")
+            repeat_times = paddle.to_tensor(repeat_times)
+            out_1 = paddle.tile(x, repeat_times=[2, 3])
+            out_2 = paddle.tile(x, repeat_times=[positive_2, 3])
+            out_3 = paddle.tile(x, repeat_times=repeat_times)
+            assert np.array_equal(out_1.numpy(), np.tile(np_x, (2, 3)))
+            assert np.array_equal(out_2.numpy(), np.tile(np_x, (2, 3)))
+            assert np.array_equal(out_3.numpy(), np.tile(np_x, (2, 3)))
+if __name__ == "__main__":
+    paddle.enable_static()
+    unittest.main()