diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index d12f51c82b2c1cdf2b6c6ae891887d14bf153005..c87d11ad994261ba4bf719086666ab2685709d1b 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -35,8 +35,7 @@ ELSE () ENDIF() SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") -SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20211029") -#SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20211020") +SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20211107") SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) diff --git a/paddle/fluid/operators/gather_nd_op_xpu.cc b/paddle/fluid/operators/gather_nd_op_xpu.cc new file mode 100644 index 0000000000000000000000000000000000000000..c7e4169865fa6158934ca9a93d99b488ff9c0286 --- /dev/null +++ b/paddle/fluid/operators/gather_nd_op_xpu.cc @@ -0,0 +1,79 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/gather_nd_op.h" + +namespace paddle { +namespace operators { + +template +class GatherNdXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *x = ctx.Input("X"); + auto *index = ctx.Input("Index"); + auto *out = ctx.Output("Out"); + + out->template mutable_data(ctx.GetPlace()); + if (x->numel() == 0) return; + + if (index->numel() == 0) { + framework::TensorCopy(*x, ctx.GetPlace(), ctx.device_context(), out); + return; + } + + const auto &index_type = index->type(); + bool index_type_match = index_type == framework::proto::VarType::INT32 || + index_type == framework::proto::VarType::INT64; + PADDLE_ENFORCE_EQ(index_type_match, true, + platform::errors::InvalidArgument( + "Index holds the wrong type, it holds [%s]," + "but desires to be [%s] or [%s]", + paddle::framework::DataTypeToString(index_type), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT32), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT64))); + + auto x_shape = paddle::framework::vectorize(x->dims()); + auto index_shape = paddle::framework::vectorize(index->dims()); + xpu::VectorParam x_vec = {x_shape.data(), + static_cast(x_shape.size()), nullptr}; + auto &dev_ctx = + ctx.template device_context(); + int ret = XPU_SUCCESS; + if (index_type == framework::proto::VarType::INT32) { + ret = xpu::gather_nd(dev_ctx.x_context(), x->data(), + index->data(), out->data(), x_vec, + index_shape); + } else { + ret = xpu::gather_nd(dev_ctx.x_context(), x->data(), + index->data(), out->data(), + x_vec, index_shape); + } + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU gather_nd kernel return wrong value[%d %s]", ret, + XPUAPIErrorMsg[ret])); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL(gather_nd, ops::GatherNdXPUKernel, + ops::GatherNdXPUKernel, + ops::GatherNdXPUKernel); + +#endif diff --git a/paddle/fluid/operators/tile_op.h b/paddle/fluid/operators/tile_op.h index 260cbc23687313e6c0dd7ad9cab35da2af1dc0e9..5211d72336124ecb9a96d5cd19495f3d5ad6f03f 100644 --- a/paddle/fluid/operators/tile_op.h +++ b/paddle/fluid/operators/tile_op.h @@ -33,6 +33,7 @@ inline std::vector get_repeat_times( auto* repeat_data = repeat_tensor->data(); framework::Tensor cpu_repeat_tensor; if (platform::is_gpu_place(repeat_tensor->place()) || + platform::is_xpu_place(repeat_tensor->place()) || platform::is_npu_place(repeat_tensor->place())) { TensorCopySync(*repeat_tensor, platform::CPUPlace(), &cpu_repeat_tensor); repeat_data = cpu_repeat_tensor.data(); @@ -50,6 +51,7 @@ inline std::vector get_repeat_times( for (size_t i = 0; i < list_repeat_times_tensor.size(); ++i) { auto tensor = list_repeat_times_tensor[i]; if (platform::is_gpu_place(tensor->place()) || + platform::is_xpu_place(tensor->place()) || platform::is_npu_place(tensor->place())) { framework::Tensor temp; TensorCopySync(*tensor, platform::CPUPlace(), &temp); diff --git a/paddle/fluid/operators/tile_op_xpu.cc b/paddle/fluid/operators/tile_op_xpu.cc new file mode 100644 index 0000000000000000000000000000000000000000..94b0e465cfedb09cb1c53584b0759dc7d2b5c296 --- /dev/null +++ b/paddle/fluid/operators/tile_op_xpu.cc @@ -0,0 +1,119 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/tile_op.h" + +namespace paddle { +namespace operators { + +template +class TileXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto rank = context.Input("X")->dims().size(); + PADDLE_ENFORCE_GE( + rank, 1, platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op must be a positive " + "integer, but the value received is %d.", + rank)); + PADDLE_ENFORCE_LE( + rank, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op " + "must be less than or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, rank)); + auto repeat_times = get_repeat_times(context); + int repeat_times_size = repeat_times.size(); + PADDLE_ENFORCE_GE( + repeat_times_size, 1, + platform::errors::InvalidArgument( + "The number of elements of the input 'repeat_times' for tile " + "op must be positive, but the value received is %d.", + repeat_times_size)); + PADDLE_ENFORCE_LE( + repeat_times_size, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The number of elements of the input 'repeat_times' for tile op " + "must be less than or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, repeat_times_size)); + + auto* in0 = context.Input("X"); + auto in_dims = in0->dims(); + for (size_t i = 0; i < repeat_times.size(); ++i) { + PADDLE_ENFORCE_GT( + repeat_times[i], 0, + platform::errors::InvalidArgument( + "All elements of the input 'repeat_times' for tile op must " + "be positive integers, but the value received is %d.", + repeat_times[i])); + } + auto vec_in_dims = framework::vectorize(in_dims); + if (repeat_times.size() < vec_in_dims.size()) { + int diff = vec_in_dims.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, 1); + } else { + int diff = repeat_times.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + } + PADDLE_ENFORCE_EQ( + repeat_times.size(), vec_in_dims.size(), + platform::errors::InvalidArgument( + "The rank (%d) of the input 'x' and the rank (%d) of the input " + "'repeat_times' for tile op must match after promotion.", + vec_in_dims.size(), repeat_times.size())); + + auto* out0 = context.Output("Out"); + framework::DDim new_in_dims = framework::make_ddim(vec_in_dims); + framework::DDim out_dims(new_in_dims); + + for (size_t i = 0; i < repeat_times.size(); ++i) { + out_dims[i] *= repeat_times[i]; + } + auto vec_out_dims = framework::vectorize(out_dims); + out0->Resize(out_dims); + out0->mutable_data(context.GetPlace()); + + auto& dev_ctx = + context.template device_context(); + std::vector temp(repeat_times.size(), 1); + if (repeat_times == temp) { + framework::TensorCopy(*in0, context.GetPlace(), dev_ctx, out0); + return; + } + + int ret = XPU_SUCCESS; + if (std::is_same::value) { + ret = xpu::broadcast( + dev_ctx.x_context(), reinterpret_cast(in0->data()), + reinterpret_cast(out0->data()), vec_in_dims, + vec_out_dims); + + } else { + ret = xpu::broadcast(dev_ctx.x_context(), in0->data(), + out0->data(), vec_in_dims, vec_out_dims); + } + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External("XPU tile kernel return wrong value[%d %s]", + ret, XPUAPIErrorMsg[ret])); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL(tile, ops::TileXPUKernel, ops::TileXPUKernel, + ops::TileXPUKernel, ops::TileXPUKernel); + +#endif diff --git a/paddle/fluid/platform/xpu/xpu2_op_list.h b/paddle/fluid/platform/xpu/xpu2_op_list.h index d1a3bb5dd3c257acb66eed320358f95e892d1b95..5eb86a36f5167d0e799bd9b42a83b75c4ff4f371 100644 --- a/paddle/fluid/platform/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/xpu/xpu2_op_list.h @@ -252,8 +252,16 @@ XPUOpMap& get_kl2_ops() { pOpKernelType(vartype::COMPLEX128, XPUPlace())})}, {"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, - {"softmax_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), - pOpKernelType(vartype::FP16, XPUPlace())})} + {"softmax_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace())})}, + {"gather_nd", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), + pOpKernelType(vartype::INT64, XPUPlace()), + pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tile", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), + pOpKernelType(vartype::INT64, XPUPlace()), + pOpKernelType(vartype::BOOL, XPUPlace()), + pOpKernelType(vartype::FP32, XPUPlace())})} // AddMore }; diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..0f9751cec4d9286a46df00b174f4dfb9e21d5076 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py @@ -0,0 +1,268 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test import OpTest +from op_test_xpu import XPUOpTest +import paddle.fluid as fluid +import paddle + + +def gather_nd_grad(x, index): + dout_shape = index.shape[:-1] + x.shape[index.shape[-1]:] + numel = 1 + for i in dout_shape: + numel = numel * i + dout = np.full(dout_shape, 1. / numel) + dx = np.full_like(x, 0) + + index = tuple(index.reshape(-1, index.shape[-1]).T) + np.add.at(dx, index, dout) + + return dx + + +def test_class1(op_type, typename): + class TestGatherNdOpWithEmptyIndex(XPUOpTest): + #Index has empty element, which means copy entire tensor + + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "gather_nd" + xnp = np.random.random((5, 20)).astype(typename) + self.inputs = { + 'X': xnp, + 'Index': np.array([[], []]).astype("int32") + } + self.outputs = { + 'Out': np.vstack((xnp[np.newaxis, :], xnp[np.newaxis, :])) + } + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}_1".format(op_type, typename) + TestGatherNdOpWithEmptyIndex.__name__ = cls_name + globals()[cls_name] = TestGatherNdOpWithEmptyIndex + + +def test_class2(op_type, typename): + class TestGatherNdOpWithIndex1(OpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "gather_nd" + xnp = np.random.random((5, 20)).astype(typename) + self.inputs = {'X': xnp, 'Index': np.array([1]).astype("int32")} + self.outputs = {'Out': self.inputs["X"][self.inputs["Index"]]} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}_2".format(op_type, typename) + TestGatherNdOpWithIndex1.__name__ = cls_name + globals()[cls_name] = TestGatherNdOpWithIndex1 + + +def test_class3(op_type, typename): + class TestGatherNdOpWithLowIndex(OpTest): + #Index has low rank, X has high rank + + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "gather_nd" + xnp = np.random.uniform(0, 100, (10, 10)).astype(typename) + index = np.array([[1], [2]]).astype("int64") + + self.inputs = {'X': xnp, 'Index': index} + self.outputs = {'Out': xnp[tuple(index.T)]} + self.x_grad = gather_nd_grad(xnp, index) + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}_3".format(op_type, typename) + TestGatherNdOpWithLowIndex.__name__ = cls_name + globals()[cls_name] = TestGatherNdOpWithLowIndex + + +def test_class4(op_type, typename): + class TestGatherNdOpIndex1(OpTest): + #Index has low rank, X has high rank + + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "gather_nd" + xnp = np.random.uniform(0, 100, (10, 10)).astype(typename) + index = np.array([1, 2]).astype("int64") + + self.inputs = {'X': xnp, 'Index': index} + + self.outputs = {'Out': xnp[tuple(index.T)]} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}_4".format(op_type, typename) + TestGatherNdOpIndex1.__name__ = cls_name + globals()[cls_name] = TestGatherNdOpIndex1 + + +def test_class5(op_type, typename): + class TestGatherNdOpWithSameIndexAsX(OpTest): + #Index has same rank as X's rank + + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "gather_nd" + xnp = np.random.uniform(0, 100, (10, 10)).astype(typename) + index = np.array([[1, 1], [2, 1]]).astype("int64") + + self.inputs = {'X': xnp, 'Index': index} + self.outputs = {'Out': xnp[tuple(index.T)]} #[25, 22] + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}_5".format(op_type, typename) + TestGatherNdOpWithSameIndexAsX.__name__ = cls_name + globals()[cls_name] = TestGatherNdOpWithSameIndexAsX + + +def test_class6(op_type, typename): + class TestGatherNdOpWithHighRankSame(OpTest): + #Both Index and X have high rank, and Rank(Index) = Rank(X) + + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "gather_nd" + shape = (5, 2, 3, 1, 10) + xnp = np.random.rand(*shape).astype(typename) + index = np.vstack([np.random.randint( + 0, s, size=2) for s in shape]).T + + self.inputs = {'X': xnp, 'Index': index.astype("int32")} + self.outputs = {'Out': xnp[tuple(index.T)]} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}_6".format(op_type, typename) + TestGatherNdOpWithHighRankSame.__name__ = cls_name + globals()[cls_name] = TestGatherNdOpWithHighRankSame + + +def test_class7(op_type, typename): + class TestGatherNdOpWithHighRankDiff(OpTest): + #Both Index and X have high rank, Rank(Index) < Rank(X) + + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "gather_nd" + shape = (2, 3, 4, 1, 10) + xnp = np.random.rand(*shape).astype(typename) + index = np.vstack( + [np.random.randint( + 0, s, size=200) for s in shape]).T + index_re = index.reshape([20, 5, 2, 5]) + + self.inputs = {'X': xnp, 'Index': index_re.astype("int32")} + self.outputs = {'Out': xnp[tuple(index.T)].reshape([20, 5, 2])} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + cls_name = "{0}_{1}_7".format(op_type, typename) + TestGatherNdOpWithHighRankDiff.__name__ = cls_name + globals()[cls_name] = TestGatherNdOpWithHighRankDiff + + +class TestGatherNdAPI(unittest.TestCase): + def test_imperative(self): + paddle.disable_static() + input_1 = np.array([[1, 2], [3, 4], [5, 6]]) + index_1 = np.array([[1]]) + input = fluid.dygraph.to_variable(input_1) + index = fluid.dygraph.to_variable(index_1) + output = paddle.fluid.layers.gather(input, index) + output_np = output.numpy() + expected_output = np.array([3, 4]) + self.assertTrue(np.allclose(output_np, expected_output)) + paddle.enable_static() + + +for _typename in {'float32', 'int', 'int64'}: + test_class1('gather_nd', _typename) + test_class2('gather_nd', _typename) + test_class3('gather_nd', _typename) + test_class4('gather_nd', _typename) + test_class5('gather_nd', _typename) + test_class6('gather_nd', _typename) + test_class7('gather_nd', _typename) + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..d010e1633578ed6f4a237dbda2641b1b563633ee --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py @@ -0,0 +1,267 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test import OpTest +from op_test_xpu import XPUOpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +from paddle.fluid import core + +paddle.enable_static() +np.random.seed(10) + + +#Situation 1: repeat_times is a list (without tensor) +class TestTileOpRank1(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "tile" + self.init_data() + + self.inputs = {'X': np.random.random(self.ori_shape).astype("float32")} + self.attrs = {'repeat_times': self.repeat_times} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +#with dimension expanding +class TestTileOpRank2Expanding(TestTileOpRank1): + def init_data(self): + self.ori_shape = [120] + self.repeat_times = [2, 2] + + +class TestTileOpRank2(TestTileOpRank1): + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + + +class TestTileOpRank3_Corner(TestTileOpRank1): + def init_data(self): + self.ori_shape = (2, 10, 5) + self.repeat_times = (1, 1, 1) + + +class TestTileOpRank3_Corner2(TestTileOpRank1): + def init_data(self): + self.ori_shape = (2, 10, 5) + self.repeat_times = (2, 2) + + +class TestTileOpRank3(TestTileOpRank1): + def init_data(self): + self.ori_shape = (2, 4, 15) + self.repeat_times = (2, 1, 4) + + +class TestTileOpRank4(TestTileOpRank1): + def init_data(self): + self.ori_shape = (2, 4, 5, 7) + self.repeat_times = (3, 2, 1, 2) + + +# Situation 2: repeat_times is a list (with tensor) +class TestTileOpRank1_tensor_attr(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "tile" + self.init_data() + repeat_times_tensor = [] + for index, ele in enumerate(self.repeat_times): + repeat_times_tensor.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype("float32"), + 'repeat_times_tensor': repeat_times_tensor, + } + self.attrs = {"repeat_times": self.infer_repeat_times} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + self.infer_repeat_times = [-1] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [1, 1] + self.infer_repeat_times = [1, -1] + + +class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr): + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + self.infer_repeat_times = [-1, 3] + + +# Situation 3: repeat_times is a tensor +class TestTileOpRank1_tensor(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "tile" + self.init_data() + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype("float32"), + 'RepeatTimes': np.array(self.repeat_times).astype("int32"), + } + self.attrs = {} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestTileOpRank2_tensor(TestTileOpRank1_tensor): + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + + +# Situation 4: input x is Integer +class TestTileOpInteger(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "tile" + self.inputs = { + 'X': np.random.randint( + 10, size=(4, 4, 5)).astype("int32") + } + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + +# Situation 5: input x is Integer +class TestTileOpInt64_t(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "tile" + self.inputs = { + 'X': np.random.randint( + 10, size=(2, 4, 5)).astype("int64") + } + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + +# Situation 6: input x is Bool +class TestTileOpBool(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "tile" + self.inputs = { + 'X': np.random.randint( + 10, size=(2, 4, 5)).astype("bool") + } + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + +# Test python API +class TestTileAPI(unittest.TestCase): + def test_api(self): + with fluid.dygraph.guard(paddle.XPUPlace(0)): + np_x = np.random.random([12, 14]).astype("float32") + x = paddle.to_tensor(np_x) + + positive_2 = np.array([2]).astype("int32") + positive_2 = paddle.to_tensor(positive_2) + + repeat_times = np.array([2, 3]).astype("int32") + repeat_times = paddle.to_tensor(repeat_times) + + out_1 = paddle.tile(x, repeat_times=[2, 3]) + out_2 = paddle.tile(x, repeat_times=[positive_2, 3]) + out_3 = paddle.tile(x, repeat_times=repeat_times) + + assert np.array_equal(out_1.numpy(), np.tile(np_x, (2, 3))) + assert np.array_equal(out_2.numpy(), np.tile(np_x, (2, 3))) + assert np.array_equal(out_3.numpy(), np.tile(np_x, (2, 3))) + + +if __name__ == "__main__": + unittest.main()