From 11adb0f373d20c598701fafa41f715d4db37672f Mon Sep 17 00:00:00 2001 From: TeslaZhao Date: Tue, 20 Oct 2020 07:47:48 +0800 Subject: [PATCH] [cherry-pick] Add xpu transpose2 op.test=kunlun (#28096) --- paddle/fluid/operators/transpose_op_xpu.cc | 192 +++++++++++++++ .../unittests/xpu/test_transpose_op_xpu.py | 230 ++++++++++++++++++ 2 files changed, 422 insertions(+) create mode 100644 paddle/fluid/operators/transpose_op_xpu.cc create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py diff --git a/paddle/fluid/operators/transpose_op_xpu.cc b/paddle/fluid/operators/transpose_op_xpu.cc new file mode 100644 index 0000000000..c7ecf2ebfa --- /dev/null +++ b/paddle/fluid/operators/transpose_op_xpu.cc @@ -0,0 +1,192 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/operators/transpose_op.h" +#include +#include +#include + +namespace paddle { +namespace operators { + +using framework::Tensor; + +bool XPUSupported(int ndims, const std::vector& axis) { + /* + * XPU currently support: + * permute = {0, 2, 1}, permute = {1, 0}, + * permute = {0, 2, 1, 3}, permute = {1, 0, 2}, + * permute = {0, 2, 3, 1} + */ + bool is_supported = false; + std::vector permute_10(2, 0); + std::vector permute_102(3, 0); + std::vector permute_021(3, 0); + std::vector permute_210(3, 0); + std::vector permute_0213(4, 0); + std::vector permute_0231(4, 0); + std::vector permute_0312(4, 0); + std::vector permute_3201(4, 0); + permute_10[0] = 1; + permute_102[0] = 1; + permute_102[2] = 2; + permute_021[1] = 2; + permute_021[2] = 1; + permute_210[0] = 2; + permute_210[1] = 1; + permute_0213[1] = 2; + permute_0213[2] = 1; + permute_0213[3] = 3; + permute_0231[1] = 2; + permute_0231[2] = 3; + permute_0231[3] = 1; + permute_0312[1] = 3; + permute_0312[2] = 1; + permute_0312[3] = 2; + permute_3201[0] = 3; + permute_3201[1] = 2; + permute_3201[3] = 1; + switch (ndims) { + case 2: + if (axis == permute_10) { + is_supported = true; + } + break; + case 3: + if ((axis == permute_021) || (axis == permute_102) || + (axis == permute_210)) { + is_supported = true; + } + break; + case 4: + if ((axis == permute_0213) || (axis == permute_0231) || + (axis == permute_0312) || (axis == permute_3201)) { + is_supported = true; + } + break; + default: + PADDLE_THROW(platform::errors::Unimplemented( + "Tensors with rank only 2, 3 and 4 are supported on XPU")); + } + return is_supported; +} + +template +class TransposeXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto x = context.Input("X"); + auto out = context.Output("Out"); + // axis is permute + auto axis = context.Attr>("axis"); + int ndims = axis.size(); + const auto x_dims = x->dims(); + + const T* x_data = x->data(); + T* y_data = out->mutable_data(context.GetPlace()); + if (!XPUSupported(ndims, axis)) { + VLOG(0) << "XPU does not support the permute, try to do on cpu"; + framework::Tensor x_cpu; + framework::Tensor out_cpu; + auto x_cpu_data = x_cpu.mutable_data(x->dims(), platform::CPUPlace()); + auto out_cpu_data = + out_cpu.mutable_data(out->dims(), platform::CPUPlace()); + memory::Copy(platform::CPUPlace(), reinterpret_cast(x_cpu_data), + BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()), + (const void*)x_data, x->numel() * sizeof(T)); + + const platform::CPUDeviceContext* cpu_dev_ctx = + static_cast( + platform::DeviceContextPool::Instance().Get( + platform::CPUPlace())); + TransCompute(ndims, *cpu_dev_ctx, x_cpu, + &out_cpu, axis); + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()), + reinterpret_cast(y_data), platform::CPUPlace(), + (const void*)out_cpu_data, out->numel() * sizeof(T)); + return; + } + + std::vector x_shape_host(ndims, 0); + for (int i = 0; i < ndims; ++i) { + x_shape_host[i] = x_dims[i]; + } + int* permute_host = axis.data(); + auto& dev_ctx = context.template device_context(); + int r = xpu::transpose(dev_ctx.x_context(), x_data, y_data, + x_shape_host.data(), permute_host, ndims); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External("XPU kernel error! error code=%d", r)); + } +}; + +template +class TransposeGradXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* out_grad = + context.Input(framework::GradVarName("Out")); + auto* x_grad = + context.Output(framework::GradVarName("X")); + if (!x_grad) return; + + x_grad->mutable_data(context.GetPlace()); + std::vector axis = context.Attr>("axis"); + std::vector reversed_axis(axis); + for (size_t i = 0; i < axis.size(); i++) { + reversed_axis[axis[i]] = i; + } + + int ndims = axis.size(); + if (!XPUSupported(ndims, reversed_axis)) { + PADDLE_THROW( + platform::errors::Unimplemented("XPU does not support the permute")); + } + + std::vector out_shape_host(ndims, 0); + for (int i = 0; i < ndims; ++i) { + out_shape_host[i] = out_grad->dims()[i]; + } + int* permute_host = reversed_axis.data(); + auto& dev_ctx = context.template device_context(); + int r = xpu::transpose(dev_ctx.x_context(), out_grad->data(), + x_grad->data(), out_shape_host.data(), + permute_host, ndims); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External("XPU kernel error! error code=%d", r)); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_XPU_KERNEL( + transpose, + ops::TransposeXPUKernel); +REGISTER_OP_XPU_KERNEL( + transpose_grad, + ops::TransposeGradXPUKernel); +REGISTER_OP_XPU_KERNEL( + transpose2, + ops::TransposeXPUKernel); +REGISTER_OP_XPU_KERNEL( + transpose2_grad, + ops::TransposeGradXPUKernel); + +#endif // PADDLE_WITH_XPU diff --git a/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py new file mode 100644 index 0000000000..c191e5f0b2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py @@ -0,0 +1,230 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys + +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard + + +class TestXPUTransposeOp(OpTest): + def setUp(self): + self.init_op_type() + self.initTestCase() + self.inputs = {'X': np.random.random(self.shape).astype("float64")} + self.attrs = { + 'axis': list(self.axis), + 'use_mkldnn': False, + 'use_xpu': True + } + self.outputs = { + 'XShape': np.random.random(self.shape).astype("float64"), + 'Out': self.inputs['X'].transpose(self.axis) + } + + def init_op_type(self): + self.op_type = "transpose2" + self.use_mkldnn = False + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + paddle.enable_static() + place = paddle.XPUPlace(0) + self.check_output_with_place(place=place, no_check_set=['XShape']) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + paddle.enable_static() + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X'], 'Out') + + def initTestCase(self): + self.shape = (3, 40) + self.axis = (1, 0) + + +class TestCase0(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (100, ) + self.axis = (0, ) + + +class TestCase1(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (3, 4, 10) + self.axis = (0, 2, 1) + + +class TestCase2(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5) + self.axis = (0, 2, 3, 1) + + +class TestCase3(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.axis = (4, 2, 3, 1, 0) + + +class TestCase4(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6, 1) + self.axis = (4, 2, 3, 1, 0, 5) + + +class TestCase5(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (2, 16, 96) + self.axis = (0, 2, 1) + + +class TestCase6(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (2, 10, 12, 16) + self.axis = (3, 1, 2, 0) + + +class TestCase7(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (2, 10, 2, 16) + self.axis = (0, 1, 3, 2) + + +class TestCase8(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (2, 3, 2, 3, 2, 4, 3, 3) + self.axis = (0, 1, 3, 2, 4, 5, 6, 7) + + +class TestCase9(TestXPUTransposeOp): + def initTestCase(self): + self.shape = (2, 3, 2, 3, 2, 4, 3, 3) + self.axis = (6, 1, 3, 5, 0, 2, 4, 7) + + +class TestTransposeOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + x = fluid.layers.data(name='x', shape=[10, 5, 3], dtype='float64') + + def test_x_Variable_check(): + # the Input(x)'s type must be Variable + fluid.layers.transpose("not_variable", perm=[1, 0, 2]) + + self.assertRaises(TypeError, test_x_Variable_check) + + def test_x_dtype_check(): + # the Input(x)'s dtype must be one of [float16, float32, float64, int32, int64] + x1 = fluid.layers.data( + name='x1', shape=[10, 5, 3], dtype='bool') + fluid.layers.transpose(x1, perm=[1, 0, 2]) + + self.assertRaises(TypeError, test_x_dtype_check) + + def test_perm_list_check(): + # Input(perm)'s type must be list + fluid.layers.transpose(x, perm="[1, 0, 2]") + + self.assertRaises(TypeError, test_perm_list_check) + + def test_perm_length_and_x_dim_check(): + # Input(perm) is the permutation of dimensions of Input(input) + # its length should be equal to dimensions of Input(input) + fluid.layers.transpose(x, perm=[1, 0, 2, 3, 4]) + + self.assertRaises(ValueError, test_perm_length_and_x_dim_check) + + def test_each_elem_value_check(): + # Each element in Input(perm) should be less than Input(x)'s dimension + fluid.layers.transpose(x, perm=[3, 5, 7]) + + self.assertRaises(ValueError, test_each_elem_value_check) + + +class TestTAPI(unittest.TestCase): + def test_out(self): + with fluid.program_guard(fluid.Program()): + data = fluid.data(shape=[10], dtype="float64", name="data") + data_t = paddle.t(data) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + data_np = np.random.random([10]).astype("float64") + result, = exe.run(feed={"data": data_np}, fetch_list=[data_t]) + expected_result = np.transpose(data_np) + self.assertEqual((result == expected_result).all(), True) + + with fluid.program_guard(fluid.Program()): + data = fluid.data(shape=[10, 5], dtype="float64", name="data") + data_t = paddle.t(data) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + data_np = np.random.random([10, 5]).astype("float64") + result, = exe.run(feed={"data": data_np}, fetch_list=[data_t]) + expected_result = np.transpose(data_np) + self.assertEqual((result == expected_result).all(), True) + + with fluid.program_guard(fluid.Program()): + data = fluid.data(shape=[1, 5], dtype="float64", name="data") + data_t = paddle.t(data) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + data_np = np.random.random([1, 5]).astype("float64") + result, = exe.run(feed={"data": data_np}, fetch_list=[data_t]) + expected_result = np.transpose(data_np) + self.assertEqual((result == expected_result).all(), True) + + with fluid.dygraph.guard(): + np_x = np.random.random([10]).astype("float64") + data = fluid.dygraph.to_variable(np_x) + z = paddle.t(data) + np_z = z.numpy() + z_expected = np.array(np.transpose(np_x)) + self.assertEqual((np_z == z_expected).all(), True) + + with fluid.dygraph.guard(): + np_x = np.random.random([10, 5]).astype("float64") + data = fluid.dygraph.to_variable(np_x) + z = paddle.t(data) + np_z = z.numpy() + z_expected = np.array(np.transpose(np_x)) + self.assertEqual((np_z == z_expected).all(), True) + + with fluid.dygraph.guard(): + np_x = np.random.random([1, 5]).astype("float64") + data = fluid.dygraph.to_variable(np_x) + z = paddle.t(data) + np_z = z.numpy() + z_expected = np.array(np.transpose(np_x)) + self.assertEqual((np_z == z_expected).all(), True) + + def test_errors(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data(name='x', shape=[10, 5, 3], dtype='float64') + + def test_x_dimension_check(): + paddle.t(x) + + self.assertRaises(ValueError, test_x_dimension_check) + + +if __name__ == "__main__": + unittest.main() -- GitLab