From 539a9e60006cc55978894d63fa81bbd1c775442c Mon Sep 17 00:00:00 2001 From: fuyou765 <64373205+fuyou765@users.noreply.github.com> Date: Fri, 17 Jun 2022 17:53:32 +0800 Subject: [PATCH] [MLU]add mlu kernel for tile op (#43389) --- paddle/fluid/operators/tile_op_functor.h | 2 + paddle/fluid/operators/tile_op_mlu.cc | 112 +++++++ .../tests/unittests/mlu/test_tile_op_mlu.py | 281 ++++++++++++++++++ 3 files changed, 395 insertions(+) create mode 100644 paddle/fluid/operators/tile_op_mlu.cc create mode 100644 python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py diff --git a/paddle/fluid/operators/tile_op_functor.h b/paddle/fluid/operators/tile_op_functor.h index 95bfb9f4e1..92992d88c8 100644 --- a/paddle/fluid/operators/tile_op_functor.h +++ b/paddle/fluid/operators/tile_op_functor.h @@ -30,6 +30,7 @@ inline std::vector get_repeat_times( framework::Tensor cpu_repeat_tensor; if (platform::is_gpu_place(repeat_tensor->place()) || platform::is_xpu_place(repeat_tensor->place()) || + platform::is_mlu_place(repeat_tensor->place()) || platform::is_npu_place(repeat_tensor->place())) { paddle::framework::TensorCopySync(*repeat_tensor, platform::CPUPlace(), &cpu_repeat_tensor); @@ -49,6 +50,7 @@ inline std::vector get_repeat_times( auto tensor = list_repeat_times_tensor[i]; if (platform::is_gpu_place(tensor->place()) || platform::is_xpu_place(tensor->place()) || + platform::is_mlu_place(tensor->place()) || platform::is_npu_place(tensor->place())) { framework::Tensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); diff --git a/paddle/fluid/operators/tile_op_mlu.cc b/paddle/fluid/operators/tile_op_mlu.cc new file mode 100644 index 0000000000..c7b3592f32 --- /dev/null +++ b/paddle/fluid/operators/tile_op_mlu.cc @@ -0,0 +1,112 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_MLU + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/tile_op_functor.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class TileMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto rank = context.Input("X")->dims().size(); + PADDLE_ENFORCE_GE( + rank, 1, + platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op must be a positive " + "integer, but the value received is %d.", + rank)); + PADDLE_ENFORCE_LE( + rank, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op " + "must be less than or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, rank)); + auto repeat_times = get_repeat_times(context); + int repeat_times_size = repeat_times.size(); + PADDLE_ENFORCE_GE( + repeat_times_size, 1, + platform::errors::InvalidArgument( + "The number of elements of the input 'repeat_times' for tile " + "op must be positive, but the value received is %d.", + repeat_times_size)); + PADDLE_ENFORCE_LE( + repeat_times_size, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The number of elements of the input 'repeat_times' for tile op " + "must be less than or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, repeat_times_size)); + + auto* in0 = context.Input("X"); + auto in_dims = in0->dims(); + for (size_t i = 0; i < repeat_times.size(); ++i) { + PADDLE_ENFORCE_GT( + repeat_times[i], 0, + platform::errors::InvalidArgument( + "All elements of the input 'repeat_times' for tile op must " + "be positive integers, but the value received is %d.", + repeat_times[i])); + } + auto vec_in_dims = phi::vectorize(in_dims); + if (repeat_times.size() < vec_in_dims.size()) { + int diff = vec_in_dims.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, 1); + } else { + int diff = repeat_times.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + } + PADDLE_ENFORCE_EQ( + repeat_times.size(), vec_in_dims.size(), + platform::errors::InvalidArgument( + "The rank (%d) of the input 'x' and the rank (%d) of the input " + "'repeat_times' for tile op must match after promotion.", + vec_in_dims.size(), repeat_times.size())); + + auto* out0 = context.Output("Out"); + bool repeat_one_times = true; + for (size_t i = 0; i < repeat_times.size(); ++i) { + if (repeat_times[i] != 1) { + repeat_one_times = false; + } + } + if (repeat_one_times) { + paddle::framework::TensorCopy(*in0, context.GetPlace(), out0); + } else { + framework::DDim new_in_dims = phi::make_ddim(vec_in_dims); + framework::DDim out_dims(new_in_dims); + for (size_t i = 0; i < repeat_times.size(); ++i) { + out_dims[i] *= repeat_times[i]; + } + out0->Resize(out_dims); + out0->mutable_data(context.GetPlace()); + MLUCnnlTensorDesc x_desc(*in0); + MLUCnnlTensorDesc out_desc(*out0); + MLUCnnl::BroadcastTo(context, x_desc.get(), GetBasePtr(in0), + out_desc.get(), GetBasePtr(out0)); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_MLU_KERNEL(tile, ops::TileMLUKernel, ops::TileMLUKernel, + ops::TileMLUKernel, ops::TileMLUKernel); + +#endif diff --git a/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py new file mode 100644 index 0000000000..1a2f5dbd40 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py @@ -0,0 +1,281 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import sys + +sys.path.append("..") +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard + + +#Situation 1: repeat_times is a list (without tensor) +class TestTileOpRank1(OpTest): + + def setUp(self): + self.op_type = "tile" + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + self.init_data() + self.inputs = {'X': np.random.random(self.ori_shape).astype("float32")} + self.attrs = {'repeat_times': self.repeat_times} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +# with dimension expanding +class TestTileOpRank2Expanding(TestTileOpRank1): + + def init_data(self): + self.ori_shape = [120] + self.repeat_times = [2, 2] + + +class TestTileOpRank2(TestTileOpRank1): + + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + + +class TestTileOpRank3_Corner(TestTileOpRank1): + + def init_data(self): + self.ori_shape = (2, 10, 5) + self.repeat_times = (1, 1, 1) + + +class TestTileOpRank3_Corner2(TestTileOpRank1): + + def init_data(self): + self.ori_shape = (2, 10, 5) + self.repeat_times = (2, 2) + + +class TestTileOpRank3(TestTileOpRank1): + + def init_data(self): + self.ori_shape = (2, 4, 15) + self.repeat_times = (2, 1, 4) + + +class TestTileOpRank4(TestTileOpRank1): + + def init_data(self): + self.ori_shape = (2, 4, 5, 7) + self.repeat_times = (3, 2, 1, 2) + + +# Situation 2: repeat_times is a list (with tensor) +class TestTileOpRank1_tensor_attr(OpTest): + + def setUp(self): + self.op_type = "tile" + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + self.init_data() + repeat_times_tensor = [] + for index, ele in enumerate(self.repeat_times): + repeat_times_tensor.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype("float32"), + 'repeat_times_tensor': repeat_times_tensor, + } + self.attrs = {"repeat_times": self.infer_repeat_times} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + self.infer_repeat_times = [-1] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): + + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [1, 1] + self.infer_repeat_times = [1, -1] + + +class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr): + + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + self.infer_repeat_times = [-1, 3] + + +# Situation 3: repeat_times is a tensor +class TestTileOpRank1_tensor(OpTest): + + def setUp(self): + self.op_type = "tile" + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + self.init_data() + self.inputs = { + 'X': np.random.random(self.ori_shape).astype("float32"), + 'RepeatTimes': np.array(self.repeat_times).astype("int32"), + } + self.attrs = {} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestTileOpRank2_tensor(TestTileOpRank1_tensor): + + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + + +# Situation 4: input x is Integer +class TestTileOpInteger(OpTest): + + def setUp(self): + self.op_type = "tile" + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + self.inputs = { + 'X': np.random.randint(10, size=(4, 4, 5)).astype("int32") + } + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output_with_place(self.place) + + +# Situation 5: input x is Bool +class TestTileOpBoolean(OpTest): + + def setUp(self): + self.op_type = "tile" + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")} + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output_with_place(self.place) + + +# Situation 56: input x is Integer +class TestTileOpInt64_t(OpTest): + + def setUp(self): + self.op_type = "tile" + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + self.inputs = { + 'X': np.random.randint(10, size=(2, 4, 5)).astype("int64") + } + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output_with_place(self.place) + + +class TestTileError(unittest.TestCase): + + def test_errors(self): + with program_guard(Program(), Program()): + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + repeat_times = [2, 2] + self.assertRaises(TypeError, paddle.tile, x1, repeat_times) + x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") + self.assertRaises(TypeError, paddle.tile, x2, repeat_times) + x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool") + x3.stop_gradient = False + self.assertRaises(ValueError, paddle.tile, x3, repeat_times) + + +class TestTileAPIStatic(unittest.TestCase): + + def test_api(self): + with program_guard(Program(), Program()): + repeat_times = [2, 2] + x1 = fluid.layers.data(name='x1', shape=[4], dtype="int32") + out = paddle.tile(x1, repeat_times) + positive_2 = fluid.layers.fill_constant([1], dtype="int32", value=2) + out2 = paddle.tile(x1, repeat_times=[positive_2, 2]) + + +# Test python API +class TestTileAPI(unittest.TestCase): + + def test_api(self): + with fluid.dygraph.guard(): + np_x = np.random.random([12, 14]).astype("float32") + x = paddle.to_tensor(np_x) + + positive_2 = np.array([2]).astype("int32") + positive_2 = paddle.to_tensor(positive_2) + + repeat_times = np.array([2, 3]).astype("int32") + repeat_times = paddle.to_tensor(repeat_times) + + out_1 = paddle.tile(x, repeat_times=[2, 3]) + out_2 = paddle.tile(x, repeat_times=[positive_2, 3]) + out_3 = paddle.tile(x, repeat_times=repeat_times) + + assert np.array_equal(out_1.numpy(), np.tile(np_x, (2, 3))) + assert np.array_equal(out_2.numpy(), np.tile(np_x, (2, 3))) + assert np.array_equal(out_3.numpy(), np.tile(np_x, (2, 3))) + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() -- GitLab