diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 0c19a17fd00533dcdf4ae0d26ce4a755e7b3399c..7f828fd66e2aad56f66658ba1d91797f5e64de35 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -35,7 +35,7 @@ ELSE () ENDIF() SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") -SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20211107") +SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20211129") SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) diff --git a/paddle/fluid/operators/expand_as_v2_op_xpu.cc b/paddle/fluid/operators/expand_as_v2_op_xpu.cc new file mode 100644 index 0000000000000000000000000000000000000000..755ee685449b169dfaca6844ac69e823d9c96ea3 --- /dev/null +++ b/paddle/fluid/operators/expand_as_v2_op_xpu.cc @@ -0,0 +1,111 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/expand_as_v2_op.h" + +namespace paddle { +namespace operators { + +template +class ExpandAsV2XPUKernel : public framework::OpKernel { + using XPUType = typename XPUTypeTrait::Type; + + public: + void Compute(const framework::ExecutionContext& context) const override { + auto rank = context.Input("X")->dims().size(); + auto target_shape = context.Attr>("target_shape"); + auto target_rank = target_shape.size(); + PADDLE_ENFORCE_GE(target_rank, rank, + platform::errors::InvalidArgument( + "The rank (%d) of the input 'target_tensor' for " + "expand_as_v2 op must be greater than or equal to " + "the rank (%d) of the input 'x'.", + target_rank, rank)); + PADDLE_ENFORCE_GE(rank, 1, platform::errors::InvalidArgument( + "The rank (%d) of the input 'x' for " + "expand_as_v2 op must be positive.", + rank)); + PADDLE_ENFORCE_LE(target_rank, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank (%d) of the input 'target_tensor' for " + "expand_as_v2 op must be less than or equal to %d.", + target_rank, MAX_RANK_SUPPORTED)); + ExpandAs(context); + } + + protected: + void ExpandAs(const framework::ExecutionContext& context) const { + auto* in0 = context.Input("X"); + auto in_dims = in0->dims(); + auto target_shape = context.Attr>("target_shape"); + auto vec_in_dims = framework::vectorize(in_dims); + auto diff = target_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + PADDLE_ENFORCE_NE(target_shape[i], 0, + platform::errors::InvalidArgument( + "The value of target shape cannot be zero.")); + if (vec_in_dims[i] != 1) { + PADDLE_ENFORCE_EQ( + vec_in_dims[i], target_shape[i], + platform::errors::InvalidArgument( + "The value (%d) of the non-singleton dimension does not match" + " the corresponding value (%d) in " + "target tensor for expand_as_v2 op.", + vec_in_dims[i], target_shape[i])); + } + } + auto* out0 = context.Output("Out"); + framework::DDim out_dims = framework::make_ddim(target_shape); + out0->Resize(out_dims); + out0->mutable_data(context.GetPlace()); + auto& in0_shape = vec_in_dims; + auto out0_shape = framework::vectorize(out_dims); + + const auto& dev_ctx = + context.template device_context(); + int r = XPU_SUCCESS; + + if (std::is_same::value) { + auto in0_data = reinterpret_cast(in0->data()); + auto out0_data = reinterpret_cast(out0->data()); + r = xpu::broadcast(dev_ctx.x_context(), in0_data, out0_data, + in0_shape, out0_shape); + } else { + auto in0_data = reinterpret_cast(in0->data()); + auto out0_data = reinterpret_cast(out0->data()); + r = xpu::broadcast(dev_ctx.x_context(), in0_data, out0_data, + in0_shape, out0_shape); + } + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(broadcast) return wrong " + "value[%d %s] in ExpandAsV2XPUKernel.", + r, XPUAPIErrorMsg[r])); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL(expand_as_v2, ops::ExpandAsV2XPUKernel, + ops::ExpandAsV2XPUKernel, + ops::ExpandAsV2XPUKernel, + ops::ExpandAsV2XPUKernel, + ops::ExpandAsV2XPUKernel); + +#endif diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h index 08131b71064287a21fe198253470a21858fe1b43..fd7c6b3f27e75ab1876a76000e467088ce8d610c 100644 --- a/paddle/fluid/operators/expand_v2_op.h +++ b/paddle/fluid/operators/expand_v2_op.h @@ -41,6 +41,12 @@ inline std::vector get_expand_shape( TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor); shape_data = cpu_shape_tensor.data(); } +#endif +#ifdef PADDLE_WITH_XPU + if (platform::is_xpu_place(shape_tensor->place())) { + TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor); + shape_data = cpu_shape_tensor.data(); + } #endif auto vec_shape = std::vector(shape_data, shape_data + shape_tensor->numel()); @@ -65,6 +71,13 @@ inline std::vector get_expand_shape( TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_epxand_shape.push_back(*temp.data()); } +#endif +#ifdef PADDLE_WITH_XPU + else if (platform::is_xpu_place(tensor->place())) { // NOLINT + framework::Tensor temp; + TensorCopySync(*tensor, platform::CPUPlace(), &temp); + vec_epxand_shape.push_back(*temp.data()); + } #endif else { // NOLINT vec_epxand_shape.push_back(*tensor->data()); diff --git a/paddle/fluid/operators/expand_v2_op_xpu.cc b/paddle/fluid/operators/expand_v2_op_xpu.cc new file mode 100644 index 0000000000000000000000000000000000000000..791f8e823655c53cbd984726be08940c2933b7c8 --- /dev/null +++ b/paddle/fluid/operators/expand_v2_op_xpu.cc @@ -0,0 +1,129 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/expand_v2_op.h" + +namespace paddle { +namespace operators { + +template +class ExpandV2XPUKernel : public framework::OpKernel { + using XPUType = typename XPUTypeTrait::Type; + + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* Out = context.Output("Out"); + + auto in_dims = X->dims(); + auto expand_shape = get_expand_shape(context); + auto vec_in_dims = framework::vectorize(in_dims); + auto diff = expand_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + std::vector final_expand_shape(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + PADDLE_ENFORCE_NE(expand_shape[i], 0, + platform::errors::InvalidArgument( + "The expanded size cannot be zero.")); + if (i < diff) { // expand_shape = [3,4,-1,-1], X = [10,2] --> + // final_expand_shape = [3,4,10,2] + PADDLE_ENFORCE_GT( + expand_shape[i], 0, + platform::errors::InvalidArgument( + "The expanded size (%d) for non-existing dimensions must be " + "positive for expand_v2 op.", + expand_shape[i])); + final_expand_shape[i] = expand_shape[i]; + } else if (expand_shape[i] > 0) { // expand_shape = [3,4,10,4], X = + // [10,1] --> final_expand_shape = + // [3,4,10,4] + if (vec_in_dims[i] != 1) { + PADDLE_ENFORCE_EQ( + vec_in_dims[i], expand_shape[i], + platform::errors::InvalidArgument( + "The value (%d) of the non-singleton dimension does not match" + " the corresponding value (%d) in shape for expand_v2 op.", + vec_in_dims[i], expand_shape[i])); + final_expand_shape[i] = expand_shape[i]; + } else { + final_expand_shape[i] = expand_shape[i]; + } + } else { // expand_shape = [3,4,-1,-1], X = [10,2] --> final_expand_shape + // = [3,4,10,2] + PADDLE_ENFORCE_EQ( + expand_shape[i], -1, + platform::errors::InvalidArgument( + "When the value in shape is negative for expand_v2 op, " + "only -1 is supported, but the value received is %d.", + expand_shape[i])); + final_expand_shape[i] = vec_in_dims[i]; + } + } + + auto rank = X->dims().size(); + PADDLE_ENFORCE_GE( + rank, 1, + platform::errors::InvalidArgument( + "The rank of the input 'X' for expand_v2_npu op must be positive, " + "but the value received is %d.", + rank)); + auto shape_size = final_expand_shape.size(); + PADDLE_ENFORCE_GE( + shape_size, rank, + platform::errors::InvalidArgument( + "The number (%d) of elements of 'shape' for expand_v2_npu op must " + "be " + "greater than or equal to the rank (%d) of the input 'X'.", + shape_size, rank)); + + framework::DDim out_dims = framework::make_ddim(final_expand_shape); + Out->Resize(out_dims); + Out->mutable_data(context.GetPlace()); + auto& x_shape = vec_in_dims; + auto out_shape = framework::vectorize(out_dims); + + const auto& dev_ctx = + context.template device_context(); + int r = XPU_SUCCESS; + + if (std::is_same::value) { + auto x_data = reinterpret_cast(X->data()); + auto out_data = reinterpret_cast(Out->data()); + r = xpu::broadcast(dev_ctx.x_context(), x_data, out_data, x_shape, + out_shape); + } else { + auto x_data = reinterpret_cast(X->data()); + auto out_data = reinterpret_cast(Out->data()); + r = xpu::broadcast(dev_ctx.x_context(), x_data, out_data, + x_shape, out_shape); + } + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( + "XPU API(broadcast) return wrong " + "value[%d %s] in ExpandV2XPUKernel.", + r, XPUAPIErrorMsg[r])); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL(expand_v2, ops::ExpandV2XPUKernel, + ops::ExpandV2XPUKernel, + ops::ExpandV2XPUKernel, + ops::ExpandV2XPUKernel, + ops::ExpandV2XPUKernel); + +#endif diff --git a/paddle/fluid/platform/device/xpu/xpu1_op_list.h b/paddle/fluid/platform/device/xpu/xpu1_op_list.h index c9545d675f90edbb39f9ed594fcbcd1bf2d1f5b4..29df763dc0ad6c14e88b14ae35f8fe3da79e2c9a 100644 --- a/paddle/fluid/platform/device/xpu/xpu1_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu1_op_list.h @@ -321,7 +321,18 @@ XPUOpMap& get_kl1_ops() { {"momuntem", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"iou_similarity", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, - {"arg_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})} + {"arg_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"expand_v2", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), + pOpKernelType(vartype::INT64, XPUPlace()), + pOpKernelType(vartype::BOOL, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace()), + pOpKernelType(vartype::FP32, XPUPlace())})}, + {"expand_as_v2", + XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), + pOpKernelType(vartype::INT64, XPUPlace()), + pOpKernelType(vartype::BOOL, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace()), + pOpKernelType(vartype::FP32, XPUPlace())})}, // AddMore }; diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 58109092fbda686da7090817c3de83dbbf47b415..78fc53cfc8535e70cbc978884dca2806514b7490 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -271,7 +271,24 @@ XPUOpMap& get_kl2_ops() { {"masked_select", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), pOpKernelType(vartype::INT64, XPUPlace()), - pOpKernelType(vartype::FP32, XPUPlace())})} + pOpKernelType(vartype::FP32, XPUPlace())})}, + {"expand_v2", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), + pOpKernelType(vartype::INT64, XPUPlace()), + pOpKernelType(vartype::BOOL, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace()), + pOpKernelType(vartype::FP32, XPUPlace())})}, + {"expand_as_v2", + XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), + pOpKernelType(vartype::INT64, XPUPlace()), + pOpKernelType(vartype::BOOL, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace()), + pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, // AddMore }; diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..6ff13f902841503f1a230e4c7a0dd2a1accb9b73 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py @@ -0,0 +1,240 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest +from op_test_xpu import XPUOpTest +import paddle +import paddle.fluid as fluid + +paddle.enable_static() +np.random.seed(10) + + +class TestExpandAsOpRank1(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_as_v2" + x = np.random.rand(100).astype("float32") + target_tensor = np.random.rand(2, 100).astype("float32") + self.inputs = {'X': x} + self.attrs = {'target_shape': target_tensor.shape} + bcast_dims = [2, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandAsOpRank2(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_as_v2" + x = np.random.rand(10, 12).astype("float32") + target_tensor = np.random.rand(10, 12).astype("float32") + self.inputs = {'X': x} + self.attrs = {'target_shape': target_tensor.shape} + bcast_dims = [1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandAsOpRank3(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_as_v2" + x = np.random.rand(2, 3, 20).astype("float32") + target_tensor = np.random.rand(2, 3, 20).astype("float32") + self.inputs = {'X': x} + self.attrs = {'target_shape': target_tensor.shape} + bcast_dims = [1, 1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandAsOpRank4(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_as_v2" + x = np.random.rand(1, 1, 7, 16).astype("float32") + target_tensor = np.random.rand(4, 6, 7, 16).astype("float32") + self.inputs = {'X': x} + self.attrs = {'target_shape': target_tensor.shape} + bcast_dims = [4, 6, 1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandAsOpRank5(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_as_v2" + x = np.random.rand(1, 1, 7, 16).astype("int32") + target_tensor = np.random.rand(4, 6, 7, 16).astype("int32") + self.inputs = {'X': x} + self.attrs = {'target_shape': target_tensor.shape} + bcast_dims = [4, 6, 1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandAsOpRank6(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_as_v2" + x = np.random.rand(1, 1, 7, 16).astype("int64") + target_tensor = np.random.rand(4, 6, 7, 16).astype("int64") + self.inputs = {'X': x} + self.attrs = {'target_shape': target_tensor.shape} + bcast_dims = [4, 6, 1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandAsOpRank6BOOL(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_as_v2" + x = np.random.rand(1, 1, 7, 16).astype("bool") + target_tensor = np.random.rand(4, 6, 7, 16).astype("bool") + self.inputs = {'X': x} + self.attrs = {'target_shape': target_tensor.shape} + bcast_dims = [4, 6, 1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandAsOpRank6FP16(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_as_v2" + x = np.random.rand(1, 1, 7, 16).astype("float16") + target_tensor = np.random.rand(4, 6, 7, 16).astype("float16") + self.inputs = {'X': x} + self.attrs = {'target_shape': target_tensor.shape} + bcast_dims = [4, 6, 1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + self.__class__.no_need_check_grad = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +# Test python API +class TestExpandAsV2API(unittest.TestCase): + def test_api(self): + input1 = np.random.random([12, 14]).astype("float32") + input2 = np.random.random([2, 12, 14]).astype("float32") + x = fluid.layers.data( + name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + + y = fluid.layers.data( + name='target_tensor', + shape=[2, 12, 14], + append_batch_size=False, + dtype="float32") + + out_1 = paddle.expand_as(x, y=y) + + exe = fluid.Executor(place=fluid.XPUPlace(0)) + res_1 = exe.run(fluid.default_main_program(), + feed={"x": input1, + "target_tensor": input2}, + fetch_list=[out_1]) + assert np.array_equal(res_1[0], np.tile(input1, (2, 1, 1))) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..810d212a59a0d5357eeebb4e571a2d9326c145f3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py @@ -0,0 +1,263 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import sys +import numpy as np +sys.path.append("..") +from op_test import OpTest +from op_test_xpu import XPUOpTest +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard +import paddle + +paddle.enable_static() +np.random.seed(10) + + +# CANN Op Support X: float32, int32, int64 +# Situation 1: shape is a list(without tensor) +class TestExpandV2XPUOpRank1(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_v2" + self.dtype = np.float32 + self.init_data() + + self.inputs = {'X': np.random.random(self.ori_shape).astype(self.dtype)} + self.attrs = {'shape': self.shape} + output = np.tile(self.inputs['X'], self.expand_times) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def init_data(self): + self.ori_shape = [100] + self.shape = [100] + self.expand_times = [1] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandV2OpRank2_DimExpanding(TestExpandV2XPUOpRank1): + def init_data(self): + self.ori_shape = [120] + self.shape = [2, 120] + self.expand_times = [2, 1] + + +class TestExpandV2OpRank2(TestExpandV2XPUOpRank1): + def init_data(self): + self.ori_shape = [1, 140] + self.shape = [12, 140] + self.expand_times = [12, 1] + + +class TestExpandV2OpRank3_Corner(TestExpandV2XPUOpRank1): + def init_data(self): + self.ori_shape = (2, 10, 5) + self.shape = (2, 10, 5) + self.expand_times = (1, 1, 1) + + +class TestExpandV2OpRank4(TestExpandV2XPUOpRank1): + def init_data(self): + self.ori_shape = (2, 4, 5, 7) + self.shape = (-1, -1, -1, -1) + self.expand_times = (1, 1, 1, 1) + + +class TestExpandV2OpRank5(TestExpandV2XPUOpRank1): + def init_data(self): + self.ori_shape = (2, 4, 1, 15) + self.shape = (2, -1, 4, -1) + self.expand_times = (1, 1, 4, 1) + + +class TestExpandV2OpRank6(TestExpandV2XPUOpRank1): + def init_data(self): + self.ori_shape = (4, 1, 30) + self.shape = (2, -1, 4, 30) + self.expand_times = (2, 1, 4, 1) + + +# Situation 2: shape is a list(with tensor) +class TestExpandV2OpXPURank1_tensor_attr(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_v2" + self.init_data() + self.dtype = np.float32 + expand_shapes_tensor = [] + for index, ele in enumerate(self.expand_shape): + expand_shapes_tensor.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype(self.dtype), + 'expand_shapes_tensor': expand_shapes_tensor, + } + self.attrs = {"shape": self.infer_expand_shape} + output = np.tile(self.inputs['X'], self.expand_times) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def init_data(self): + self.ori_shape = [100] + self.expand_times = [1] + self.expand_shape = [100] + self.infer_expand_shape = [-1] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TestExpandV2OpRank2_Corner_tensor_attr( + TestExpandV2OpXPURank1_tensor_attr): + def init_data(self): + self.ori_shape = [12, 14] + self.expand_times = [1, 1] + self.expand_shape = [12, 14] + self.infer_expand_shape = [12, -1] + + +# Situation 3: shape is a tensor +class TestExpandV2XPUOpRank1_tensor(XPUOpTest): + def setUp(self): + self.set_xpu() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_v2" + self.init_data() + self.dtype = np.float32 + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype(self.dtype), + 'Shape': np.array(self.expand_shape).astype("int32"), + } + self.attrs = {} + output = np.tile(self.inputs['X'], self.expand_times) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def init_data(self): + self.ori_shape = [100] + self.expand_times = [2, 1] + self.expand_shape = [2, 100] + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +# Situation 5: input x is int32 +# skip grad check for int32 +class TestExpandV2OpInteger(XPUOpTest): + def init_type(self): + self.dtype = 'int32' + + def setUp(self): + self.set_xpu() + self.init_type() + self.place = paddle.XPUPlace(0) + self.op_type = "expand_v2" + self.inputs = { + 'X': np.random.randint( + 10, size=(2, 4, 20)).astype(self.dtype) + } + self.attrs = {'shape': [2, 4, 20]} + output = np.tile(self.inputs['X'], (1, 1, 1)) + self.outputs = {'Out': output} + + def set_xpu(self): + self.__class__.use_xpu = True + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + pass + + +class TesstExpandV2OpInt64(TestExpandV2OpInteger): + def init_dtype(self): + self.dtype = 'int64' + + +class TesstExpandV2OpBool(TestExpandV2OpInteger): + def init_dtype(self): + self.dtype = 'bool' + + +class TesstExpandV2OpFP16(TestExpandV2OpInteger): + def init_dtype(self): + self.dtype = 'float16' + + +# Test python API +class TestExpandV2API(unittest.TestCase): + def test_static(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = np.random.random([12, 14]).astype("float32") + x = fluid.layers.data( + name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") + + positive_2 = fluid.layers.fill_constant([1], "int32", 12) + expand_shape = fluid.layers.data( + name="expand_shape", + shape=[2], + append_batch_size=False, + dtype="int32") + + out_1 = paddle.expand(x, shape=[12, 14]) + out_2 = paddle.expand(x, shape=[positive_2, 14]) + out_3 = paddle.expand(x, shape=expand_shape) + + g0 = fluid.backward.calc_gradient(out_2, x) + + exe = fluid.Executor(place=paddle.XPUPlace(0)) + res_1, res_2, res_3 = exe.run(fluid.default_main_program(), + feed={ + "x": input, + "expand_shape": + np.array([12, 14]).astype("int32") + }, + fetch_list=[out_1, out_2, out_3]) + + assert np.array_equal(res_1, np.tile(input, (1, 1))) + assert np.array_equal(res_2, np.tile(input, (1, 1))) + assert np.array_equal(res_3, np.tile(input, (1, 1))) + + +if __name__ == "__main__": + unittest.main()