From 840d54de9b1eac48fa5a871eecb703f0b598e558 Mon Sep 17 00:00:00 2001 From: mapingshuo <mps2012@yeah.net> Date: Mon, 12 Oct 2020 10:29:30 +0800 Subject: [PATCH] add XPU support for shape op and reshape op (#27804) --- paddle/fluid/operators/reshape_op.cc | 37 +++- paddle/fluid/operators/shape_op_xpu.cc | 21 ++ .../unittests/xpu/test_reshape2_op_xpu.py | 207 ++++++++++++++++++ .../tests/unittests/xpu/test_shape_op_xpu.py | 94 ++++++++ 4 files changed, 354 insertions(+), 5 deletions(-) create mode 100644 paddle/fluid/operators/shape_op_xpu.cc create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index e03824ca8c3..05bb37ee421 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -49,7 +49,8 @@ inline std::vector<int> get_new_shape( "the element's shape must be [1]. But received the element's shape " "is [%s]", tensor->dims())); - if (platform::is_gpu_place(tensor->place())) { + if (platform::is_gpu_place(tensor->place()) || + platform::is_xpu_place(tensor->place())) { framework::Tensor temp; TensorCopySync(*tensor, platform::CPUPlace(), &temp); @@ -362,7 +363,8 @@ class ReshapeKernel { if (shape_tensor) { auto *shape_data = shape_tensor->data<int>(); framework::Tensor cpu_shape_tensor; - if (platform::is_gpu_place(shape_tensor->place())) { + if (platform::is_gpu_place(shape_tensor->place()) || + platform::is_xpu_place(shape_tensor->place())) { TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor); shape_data = cpu_shape_tensor.data<int>(); @@ -375,9 +377,22 @@ class ReshapeKernel { out->Resize(out_dims); out->mutable_data(ctx.GetPlace(), in->type()); - framework::TensorCopy( - *in, ctx.GetPlace(), - ctx.template device_context<platform::DeviceContext>(), out); + +#ifdef PADDLE_WITH_XPU + if (platform::is_xpu_place(ctx.GetPlace())) { + auto &dev_ctx = + ctx.template device_context<paddle::platform::XPUDeviceContext>(); + xpu::memcpy_device( + dev_ctx.x_context(), out->data<void>(), in->data<void>(), + in->numel() * paddle::framework::SizeOfType(in->type())); + } else { +#endif + framework::TensorCopy( + *in, ctx.GetPlace(), + ctx.template device_context<platform::DeviceContext>(), out); +#ifdef PADDLE_WITH_XPU + } +#endif out->Resize(out_dims); } }; @@ -644,3 +659,15 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, plat::float16, ops::ReshapeDoubleGradKernel); #endif + +#ifdef PADDLE_WITH_XPU +REGISTER_OP_XPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, + ops::ReshapeKernel, int, ops::ReshapeKernel, + int64_t, ops::ReshapeKernel, plat::float16, + ops::ReshapeKernel); +REGISTER_OP_XPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, + double, ops::ReshapeGradKernel, int, + ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel, plat::float16, + ops::ReshapeGradKernel); +#endif diff --git a/paddle/fluid/operators/shape_op_xpu.cc b/paddle/fluid/operators/shape_op_xpu.cc new file mode 100644 index 00000000000..2e9092a6432 --- /dev/null +++ b/paddle/fluid/operators/shape_op_xpu.cc @@ -0,0 +1,21 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/shape_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL(shape, ops::ShapeKernel<bool>, ops::ShapeKernel<int>, + ops::ShapeKernel<int64_t>, ops::ShapeKernel<float>, + ops::ShapeKernel<double>); + +#endif diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py new file mode 100644 index 00000000000..1a21b0f1972 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py @@ -0,0 +1,207 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys + +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard + + +# situation 1: have shape( list, no tensor), no actual shape(Tensor) +class TestReshapeOp(OpTest): + def setUp(self): + self.init_data() + self.op_type = "reshape2" + self.inputs = {"X": np.random.random(self.ori_shape).astype("float32")} + self.attrs = {"shape": self.new_shape, "use_xpu": True} + self.outputs = { + "Out": self.inputs["X"].reshape(self.infered_shape), + 'XShape': np.random.random(self.ori_shape).astype("float32") + } + + def init_data(self): + self.ori_shape = (2, 60) + self.new_shape = (12, 10) + self.infered_shape = (12, 10) + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, no_check_set=['XShape']) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ["X"], "Out") + + +class TestReshapeOpDimInfer1(TestReshapeOp): + def init_data(self): + self.ori_shape = (5, 25) + self.new_shape = (5, -1, 5) + self.infered_shape = (5, -1, 5) + + +class TestReshapeOpDimInfer2(TestReshapeOp): + def init_data(self): + self.ori_shape = (10, 2, 6) + self.new_shape = (10, 0, 3, -1) + self.infered_shape = (10, 2, 3, -1) + + +# situation 2: have shape(list, no tensor), have actual shape(Tensor) +class TestReshapeOpWithInputShape(OpTest): + def setUp(self): + self.init_data() + self.op_type = "reshape2" + + self.inputs = { + "X": np.random.random(self.ori_shape).astype("float32"), + "Shape": np.array( + self.actual_shape, dtype="int32") + } + self.attrs = {"shape": self.new_shape, "use_xpu": True} + self.outputs = { + "Out": self.inputs["X"].reshape(self.actual_shape), + 'XShape': np.random.random(self.ori_shape).astype("float32") + } + + def init_data(self): + self.ori_shape = (6, 20) + self.new_shape = (0, -1, 20) + self.actual_shape = (2, 3, 20) + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, no_check_set=['XShape']) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ["X"], "Out") + + +# Situation 3: have shape(list, have tensor), no actual shape(Tensor) +class TestReshapeOp_attr_ShapeTensor(OpTest): + def setUp(self): + self.init_data() + self.op_type = "reshape2" + + shape_tensor = [] + for index, ele in enumerate(self.new_shape): + shape_tensor.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = { + "X": np.random.random(self.ori_shape).astype("float32"), + 'ShapeTensor': shape_tensor + } + self.attrs = {'shape': self.shape, "use_xpu": True} + self.outputs = { + "Out": self.inputs["X"].reshape(self.infered_shape), + 'XShape': np.random.random(self.ori_shape).astype("float32") + } + + def init_data(self): + self.ori_shape = (4, 25) + self.new_shape = (10, 10) + self.infered_shape = (10, 10) + self.shape = (-1, -1) + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, no_check_set=['XShape']) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ["X"], "Out") + + +class TestReshapeOpDimInfer1_attr_ShapeTensor(TestReshapeOp_attr_ShapeTensor): + def init_data(self): + self.ori_shape = (5, 20) + self.new_shape = (5, -1, 20) + self.infered_shape = (5, -1, 20) + self.shape = (5, -1, -1) + + +class TestReshapeOpDimInfer2_attr_ShapeTensor(TestReshapeOp_attr_ShapeTensor): + def init_data(self): + self.ori_shape = (10, 2, 6) + self.new_shape = (10, 0, 3, -1) + self.infered_shape = (10, 2, 3, -1) + self.shape = (10, 0, 3, -1) + + +# Situation 4: have shape(Tensor), no actual shape(Tensor) +class TestReshapeOp_attr_OnlyShape(OpTest): + def setUp(self): + self.init_data() + self.op_type = "reshape2" + + self.inputs = { + "X": np.random.random(self.ori_shape).astype("float32"), + "Shape": np.array( + self.new_shape, dtype="int32") + } + self.attrs = {"use_xpu": True} + self.outputs = { + "Out": self.inputs["X"].reshape(self.infered_shape), + 'XShape': np.random.random(self.ori_shape).astype("float32") + } + + def init_data(self): + self.ori_shape = (4, 25) + self.new_shape = (10, 10) + self.infered_shape = (10, 10) + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, no_check_set=['XShape']) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ["X"], "Out") + + +class TestReshapeOpDimInfer1_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): + def init_data(self): + self.ori_shape = (5, 20) + self.new_shape = (5, -1, 10) + self.infered_shape = (5, -1, 10) + self.shape = (5, -1, -1) + + +class TestReshapeOpDimInfer2_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): + def init_data(self): + self.ori_shape = (10, 2, 6) + self.new_shape = (10, 0, 3, -1) + self.infered_shape = (10, 2, 3, -1) + self.shape = (10, 0, 3, -1) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py new file mode 100644 index 00000000000..f194f3ca80c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py @@ -0,0 +1,94 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test import OpTest +import paddle +from paddle.fluid import core +from paddle.fluid.op import Operator + + +class TestShapeOp(OpTest): + def setUp(self): + self.op_type = "shape" + self.config() + self.shape = [2, 3] + input = np.zeros(self.shape) + self.inputs = {'Input': input} + self.outputs = {'Out': np.array(self.shape)} + + def config(self): + self.shape = [2, 3] + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +class case1(TestShapeOp): + def config(self): + self.shape = [2] + + +class case2(TestShapeOp): + def config(self): + self.shape = [1, 2, 3] + + +class TestShapeWithSelectedRows(unittest.TestCase): + def get_places(self): + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + if core.is_compiled_with_xpu(): + places.append(core.XPUPlace(0)) + return places + + def check_with_place(self, place): + scope = core.Scope() + x_rows = [0, 1, 5, 4, 19] + height = 20 + row_numel = 2 + + np_array = np.ones((len(x_rows), row_numel)).astype("float32") + + # initialize input variable X + x = scope.var('X').get_selected_rows() + x.set_rows(x_rows) + x.set_height(height) + x_tensor = x.get_tensor() + x_tensor.set(np_array, place) + + # initialize input variable Out + out_shape = scope.var("Out").get_tensor() + op = Operator("shape", Input="X", Out="Out") + + op.run(scope, place) + + out_shape = np.array(out_shape).tolist() + self.assertListEqual([5, 2], out_shape) + + def test_check_output(self): + for place in self.get_places(): + self.check_with_place(place) + + +if __name__ == '__main__': + unittest.main() -- GitLab