From 3dce9f0a760b2a7bbf1ad92ee36f60029832a0b8 Mon Sep 17 00:00:00 2001 From: niuliling123 <51102941+niuliling123@users.noreply.github.com> Date: Mon, 15 May 2023 10:37:08 +0800 Subject: [PATCH] Tranpose layout (#53351) * update * Update backward.h * Update composite_backward_api.h * Update tensor_utils.cc * Update backward.cc * update * stype * update * add ctest * code stype --- paddle/fluid/eager/eager_layout_auto_tune.h | 2 +- paddle/fluid/eager/eager_layout_transformer.h | 2 +- paddle/phi/api/yaml/legacy_backward.yaml | 9 ++ paddle/phi/api/yaml/legacy_ops.yaml | 9 ++ paddle/phi/infermeta/backward.cc | 27 +++++ paddle/phi/infermeta/backward.h | 8 ++ paddle/phi/infermeta/unary.cc | 19 --- .../phi/kernels/cpu/transpose_grad_kernel.cc | 13 ++ .../phi/kernels/gpu/transpose_grad_kernel.cu | 14 +++ .../kernels/impl/transpose_grad_kernel_impl.h | 9 ++ paddle/phi/kernels/transpose_grad_kernel.h | 12 ++ .../tests/unittests/test_trans_layout_op.py | 112 ++++++++++++++++++ 12 files changed, 215 insertions(+), 21 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_trans_layout_op.py diff --git a/paddle/fluid/eager/eager_layout_auto_tune.h b/paddle/fluid/eager/eager_layout_auto_tune.h index d95d464523c..61a5218d36c 100644 --- a/paddle/fluid/eager/eager_layout_auto_tune.h +++ b/paddle/fluid/eager/eager_layout_auto_tune.h @@ -150,7 +150,7 @@ inline std::shared_ptr EagerLayoutAutotune( op_name, tensors_vector, tensors_vector[0][0].layout()); } - if (op_name == "transpose2" && + if ((op_name == "transpose2" || op_name == "trans_layout") && (tensors_vector[0][0].layout() == DesiredLayout())) { auto trans = std::make_shared(op_name); trans->SetAttr(attr, diff --git a/paddle/fluid/eager/eager_layout_transformer.h b/paddle/fluid/eager/eager_layout_transformer.h index ce80f6c948a..ddba37166ca 100644 --- a/paddle/fluid/eager/eager_layout_transformer.h +++ b/paddle/fluid/eager/eager_layout_transformer.h @@ -34,7 +34,7 @@ inline paddle::Tensor EagerTraceTransposeOp(const phi::DataLayout layout, } else { axis = {0, 1, 2, 3}; } - auto out_tensor = transpose_ad_func(in, axis); + auto out_tensor = trans_layout_ad_func(in, axis); VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout; return out_tensor; } diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 758a2ac3147..37a3626861c 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -1019,6 +1019,15 @@ composite : tile_grad(x, outgrad, repeat_times, x_grad) backward : tile_double_grad +- backward_op : trans_layout_grad + forward : trans_layout (Tensor x, int[] perm) -> Tensor(out) + args : (Tensor x, Tensor out_grad, int[] perm) + output : Tensor(x_grad) + infer_meta : + func : TransLayoutGradInferMeta + kernel : + func : trans_layout_grad + - backward_op : transpose_double_grad forward : transpose_grad (Tensor grad_out, int[] perm) -> Tensor(grad_x) args : (Tensor grad_x_grad, int[] perm) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 9b76564c832..cf85f1b8bfe 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -1103,6 +1103,15 @@ func : tile backward : tile_grad +- op : trans_layout + args : (Tensor x, int[] perm) + output : Tensor + infer_meta : + func : TransposeInferMeta + kernel : + func : transpose + backward : trans_layout_grad + - op : transpose args : (Tensor x, int[] perm) output : Tensor diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index bd9fae6bd15..cb491240172 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -1069,6 +1069,33 @@ void StackGradInferMeta(const MetaTensor& out_grad, } } +void TransposeGradInferMeta(const MetaTensor& x, + const std::vector& axis, + MetaTensor* out) { + size_t x_rank = x.dims().size(); + std::vector formated_axis = axis; + for (size_t i = 0; i < axis.size(); i++) { + if (axis[i] < 0) { + formated_axis[i] = axis[i] + x_rank; + } + } + + std::vector reversed_axis(axis); + for (size_t i = 0; i < formated_axis.size(); i++) { + reversed_axis[formated_axis[i]] = i; + } + + TransposeInferMeta(x, reversed_axis, out); +} + +void TransLayoutGradInferMeta(const MetaTensor& x, + const MetaTensor& out_grad, + const std::vector& axis, + MetaTensor* x_grad) { + TransposeGradInferMeta(out_grad, axis, x_grad); + x_grad->set_layout(static_cast(x.layout())); +} + void UniformRandomInplaceGradInferMeta(const MetaTensor& out_grad, float min, float max, diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h index 8d3edf2a40a..4dc995cb296 100644 --- a/paddle/phi/infermeta/backward.h +++ b/paddle/phi/infermeta/backward.h @@ -413,6 +413,14 @@ void StackGradInferMeta(const MetaTensor& out_grad, int axis, std::vector x_grad); +void TransposeInferMeta(const MetaTensor& x, + const std::vector& axis, + MetaTensor* out); + +void TransLayoutGradInferMeta(const MetaTensor& x, + const MetaTensor& out_grad, + const std::vector& axis, + MetaTensor* out); void UniformRandomInplaceGradInferMeta(const MetaTensor& out_grad, float min, float max, diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 42b99dee7cb..67ad639f648 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -4483,25 +4483,6 @@ void TransposeInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); } -void TransposeGradInferMeta(const MetaTensor& x, - const std::vector& axis, - MetaTensor* out) { - size_t x_rank = x.dims().size(); - std::vector formated_axis = axis; - for (size_t i = 0; i < axis.size(); i++) { - if (axis[i] < 0) { - formated_axis[i] = axis[i] + x_rank; - } - } - - std::vector reversed_axis(axis); - for (size_t i = 0; i < formated_axis.size(); i++) { - reversed_axis[formated_axis[i]] = i; - } - - TransposeInferMeta(x, reversed_axis, out); -} - void UnbindInferMeta(const MetaTensor& x, int axis, std::vector outs) { diff --git a/paddle/phi/kernels/cpu/transpose_grad_kernel.cc b/paddle/phi/kernels/cpu/transpose_grad_kernel.cc index dee69222e6d..590d227d1e5 100644 --- a/paddle/phi/kernels/cpu/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/transpose_grad_kernel.cc @@ -31,3 +31,16 @@ PD_REGISTER_KERNEL(transpose_grad, phi::dtype::bfloat16, phi::dtype::complex, phi::dtype::complex) {} + +PD_REGISTER_KERNEL(trans_layout_grad, + CPU, + ALL_LAYOUT, + phi::TransLayoutGradKernel, + bool, + float, + double, + int32_t, + int64_t, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/transpose_grad_kernel.cu b/paddle/phi/kernels/gpu/transpose_grad_kernel.cu index e8ad456084a..35215ed5b96 100644 --- a/paddle/phi/kernels/gpu/transpose_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/transpose_grad_kernel.cu @@ -32,3 +32,17 @@ PD_REGISTER_KERNEL(transpose_grad, phi::dtype::bfloat16, phi::dtype::complex, phi::dtype::complex) {} + +PD_REGISTER_KERNEL(trans_layout_grad, + GPU, + ALL_LAYOUT, + phi::TransLayoutGradKernel, + bool, + float, + double, + int32_t, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/impl/transpose_grad_kernel_impl.h b/paddle/phi/kernels/impl/transpose_grad_kernel_impl.h index 71f35fa1fdf..770fdcd2007 100644 --- a/paddle/phi/kernels/impl/transpose_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/transpose_grad_kernel_impl.h @@ -42,4 +42,13 @@ void TransposeGradKernel(const Context& dev_ctx, TransposeKernel(dev_ctx, out_grad, reversed_axis, x_grad); } +template +void TransLayoutGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& axis, + DenseTensor* x_grad) { + TransposeGradKernel(dev_ctx, out_grad, axis, x_grad); +} + } // namespace phi diff --git a/paddle/phi/kernels/transpose_grad_kernel.h b/paddle/phi/kernels/transpose_grad_kernel.h index e224da81a25..be617eae143 100644 --- a/paddle/phi/kernels/transpose_grad_kernel.h +++ b/paddle/phi/kernels/transpose_grad_kernel.h @@ -26,4 +26,16 @@ void TransposeGradKernel(const Context& dev_ctx, const std::vector& axis, DenseTensor* x_grad); +template +void TransposeGradStrideKernel(const Context& dev_ctx, + const DenseTensor& out_grad, + const std::vector& axis, + DenseTensor* x_grad); +template +void TransLayoutGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& axis, + DenseTensor* x_grad); + } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_trans_layout_op.py b/python/paddle/fluid/tests/unittests/test_trans_layout_op.py new file mode 100644 index 00000000000..e6e1967d75e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_trans_layout_op.py @@ -0,0 +1,112 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import tempfile +import unittest + +import numpy as np +from eager_op_test import OpTest + +import paddle + + +def transpose_layout(x, src_layout, dst_layout): + return x.transpose([0, 2, 3, 1]) + + +class TestTransferLayoutFP16Op(OpTest): + def setUp(self): + paddle.enable_static() + self.op_type = 'transfer_layout' + self.dtype = np.float16 + x = np.random.random(size=[2, 5, 10, 10]) + self.inputs = {'X': x.astype(self.dtype)} + self.outputs = {'Out': x.transpose([0, 2, 3, 1])} + self.attrs = {'src_layout': 0, 'dst_layout': 1} + self.python_api = transpose_layout + + def test_check_output(self): + self.check_output() + + +class LayoutAutoTune(unittest.TestCase): + def test_config(self): + paddle.fluid.core.enable_layout_autotune() + if self.use_autoune(): + self.assertEqual(paddle.fluid.core.use_layout_autotune(), True) + paddle.fluid.core.disable_layout_autotune() + self.assertEqual(paddle.fluid.core.use_layout_autotune(), False) + self.use_autoune() + + def setUp(self): + paddle.disable_static() + self.use_autoune() + + def use_autoune(self): + if paddle.is_compiled_with_cuda(): + paddle.incubate.autotune.set_config( + config={"layout": {"enable": True}} + ) + return paddle.fluid.core.use_layout_autotune() + else: + config = {"layout": {"enable": False}} + tfile = tempfile.NamedTemporaryFile(mode="w+", delete=False) + json.dump(config, tfile) + tfile.close() + paddle.incubate.autotune.set_config(tfile.name) + os.remove(tfile.name) + return paddle.fluid.core.use_layout_autotune() + + def test_flatten_op_transposer(self): + conv = paddle.nn.Conv2D(3, 8, (3, 3)) + flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2) + data = paddle.rand([1, 3, 16, 14]) + with paddle.amp.auto_cast(level="O2"): + conv_out = conv(data) + # conv_out.shape = [1, 14, 12, 8] with NHWC + # layout tuner will transpose conv_out to + # [1, 8, 14, 12] with NCHW before the following flatten op + # because it flatten the C and H dimensions. + out = flatten(conv_out) + + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [1, 112, 12]) + + def test_argmax_op_transposer_keep_dims(self): + conv = paddle.nn.Conv2D(3, 8, (3, 3)) + data = paddle.rand([1, 3, 16, 14]) + with paddle.amp.auto_cast(level="O2"): + conv_out = conv(data) + # conv_out.shape = [1, 14, 12, 8] with NHWC + out = paddle.argmax(conv_out, axis=1, keepdim=True) + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [1, 1, 14, 12]) + + def test_concat_op_transposer(self): + in1 = paddle.rand([1, 8, 14, 12]) + conv = paddle.nn.Conv2D(3, 8, (3, 3)) + data = paddle.rand([1, 3, 16, 14]) + with paddle.amp.auto_cast(level="O2"): + conv_out = conv(data) + # conv_out.shape = [1, 14, 12, 8] with NHWC + out = paddle.concat(x=[conv_out, in1], axis=0) + + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [2, 8, 14, 12]) + + +if __name__ == '__main__': + unittest.main() -- GitLab