diff --git a/paddle/fluid/eager/eager_layout_auto_tune.h b/paddle/fluid/eager/eager_layout_auto_tune.h index d95d464523c2f26cf4c5eb28d42aab8294e9a9a2..61a5218d36ce393cd7bba48c03b01f4c15c87a2f 100644 --- a/paddle/fluid/eager/eager_layout_auto_tune.h +++ b/paddle/fluid/eager/eager_layout_auto_tune.h @@ -150,7 +150,7 @@ inline std::shared_ptr EagerLayoutAutotune( op_name, tensors_vector, tensors_vector[0][0].layout()); } - if (op_name == "transpose2" && + if ((op_name == "transpose2" || op_name == "trans_layout") && (tensors_vector[0][0].layout() == DesiredLayout())) { auto trans = std::make_shared(op_name); trans->SetAttr(attr, diff --git a/paddle/fluid/eager/eager_layout_transformer.h b/paddle/fluid/eager/eager_layout_transformer.h index ce80f6c948a7f23ac28ee9bf88ee1e56d31b0258..ddba37166cac268cbd954269e4cb1bdda4ce82c3 100644 --- a/paddle/fluid/eager/eager_layout_transformer.h +++ b/paddle/fluid/eager/eager_layout_transformer.h @@ -34,7 +34,7 @@ inline paddle::Tensor EagerTraceTransposeOp(const phi::DataLayout layout, } else { axis = {0, 1, 2, 3}; } - auto out_tensor = transpose_ad_func(in, axis); + auto out_tensor = trans_layout_ad_func(in, axis); VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout; return out_tensor; } diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 758a2ac3147e704ecfc614eac159223fdaa61727..37a3626861c087d10a8ed6dec0c8829b8887ca91 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -1019,6 +1019,15 @@ composite : tile_grad(x, outgrad, repeat_times, x_grad) backward : tile_double_grad +- backward_op : trans_layout_grad + forward : trans_layout (Tensor x, int[] perm) -> Tensor(out) + args : (Tensor x, Tensor out_grad, int[] perm) + output : Tensor(x_grad) + infer_meta : + func : TransLayoutGradInferMeta + kernel : + func : trans_layout_grad + - backward_op : transpose_double_grad forward : transpose_grad (Tensor grad_out, int[] perm) -> Tensor(grad_x) args : (Tensor grad_x_grad, int[] perm) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 9b76564c83259de8c81a06fcba0e8b9a16d5278a..cf85f1b8bfe327131dab13d78fb23cee457ad617 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -1103,6 +1103,15 @@ func : tile backward : tile_grad +- op : trans_layout + args : (Tensor x, int[] perm) + output : Tensor + infer_meta : + func : TransposeInferMeta + kernel : + func : transpose + backward : trans_layout_grad + - op : transpose args : (Tensor x, int[] perm) output : Tensor diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index bd9fae6bd155b59b90e10a3a5c2f69d0ddd0e0ef..cb4912401722173ffab766f5d716a853ac70991b 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -1069,6 +1069,33 @@ void StackGradInferMeta(const MetaTensor& out_grad, } } +void TransposeGradInferMeta(const MetaTensor& x, + const std::vector& axis, + MetaTensor* out) { + size_t x_rank = x.dims().size(); + std::vector formated_axis = axis; + for (size_t i = 0; i < axis.size(); i++) { + if (axis[i] < 0) { + formated_axis[i] = axis[i] + x_rank; + } + } + + std::vector reversed_axis(axis); + for (size_t i = 0; i < formated_axis.size(); i++) { + reversed_axis[formated_axis[i]] = i; + } + + TransposeInferMeta(x, reversed_axis, out); +} + +void TransLayoutGradInferMeta(const MetaTensor& x, + const MetaTensor& out_grad, + const std::vector& axis, + MetaTensor* x_grad) { + TransposeGradInferMeta(out_grad, axis, x_grad); + x_grad->set_layout(static_cast(x.layout())); +} + void UniformRandomInplaceGradInferMeta(const MetaTensor& out_grad, float min, float max, diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h index 8d3edf2a40a94410bbcdd830fd20627341a6548b..4dc995cb296f6833431154153dbabce9ed266228 100644 --- a/paddle/phi/infermeta/backward.h +++ b/paddle/phi/infermeta/backward.h @@ -413,6 +413,14 @@ void StackGradInferMeta(const MetaTensor& out_grad, int axis, std::vector x_grad); +void TransposeInferMeta(const MetaTensor& x, + const std::vector& axis, + MetaTensor* out); + +void TransLayoutGradInferMeta(const MetaTensor& x, + const MetaTensor& out_grad, + const std::vector& axis, + MetaTensor* out); void UniformRandomInplaceGradInferMeta(const MetaTensor& out_grad, float min, float max, diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 42b99dee7cb5025130bc70fef1fc797f52711aec..67ad639f648d8be7ad61dc727cd821644c18f565 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -4483,25 +4483,6 @@ void TransposeInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); } -void TransposeGradInferMeta(const MetaTensor& x, - const std::vector& axis, - MetaTensor* out) { - size_t x_rank = x.dims().size(); - std::vector formated_axis = axis; - for (size_t i = 0; i < axis.size(); i++) { - if (axis[i] < 0) { - formated_axis[i] = axis[i] + x_rank; - } - } - - std::vector reversed_axis(axis); - for (size_t i = 0; i < formated_axis.size(); i++) { - reversed_axis[formated_axis[i]] = i; - } - - TransposeInferMeta(x, reversed_axis, out); -} - void UnbindInferMeta(const MetaTensor& x, int axis, std::vector outs) { diff --git a/paddle/phi/kernels/cpu/transpose_grad_kernel.cc b/paddle/phi/kernels/cpu/transpose_grad_kernel.cc index dee69222e6dc0810c53bc5bb4d9033365ecf0ffd..590d227d1e518cbe3b1cf36cd4736e48155b4d50 100644 --- a/paddle/phi/kernels/cpu/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/transpose_grad_kernel.cc @@ -31,3 +31,16 @@ PD_REGISTER_KERNEL(transpose_grad, phi::dtype::bfloat16, phi::dtype::complex, phi::dtype::complex) {} + +PD_REGISTER_KERNEL(trans_layout_grad, + CPU, + ALL_LAYOUT, + phi::TransLayoutGradKernel, + bool, + float, + double, + int32_t, + int64_t, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/transpose_grad_kernel.cu b/paddle/phi/kernels/gpu/transpose_grad_kernel.cu index e8ad456084ab147d9dea0f381bd5362be9bca633..35215ed5b96ed611746e7ea3d1dc3bb69e9ca80b 100644 --- a/paddle/phi/kernels/gpu/transpose_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/transpose_grad_kernel.cu @@ -32,3 +32,17 @@ PD_REGISTER_KERNEL(transpose_grad, phi::dtype::bfloat16, phi::dtype::complex, phi::dtype::complex) {} + +PD_REGISTER_KERNEL(trans_layout_grad, + GPU, + ALL_LAYOUT, + phi::TransLayoutGradKernel, + bool, + float, + double, + int32_t, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/impl/transpose_grad_kernel_impl.h b/paddle/phi/kernels/impl/transpose_grad_kernel_impl.h index 71f35fa1fdf0e697427b47a9773d968d206c9617..770fdcd20077eaa5dd937de3329a80e077517d81 100644 --- a/paddle/phi/kernels/impl/transpose_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/transpose_grad_kernel_impl.h @@ -42,4 +42,13 @@ void TransposeGradKernel(const Context& dev_ctx, TransposeKernel(dev_ctx, out_grad, reversed_axis, x_grad); } +template +void TransLayoutGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& axis, + DenseTensor* x_grad) { + TransposeGradKernel(dev_ctx, out_grad, axis, x_grad); +} + } // namespace phi diff --git a/paddle/phi/kernels/transpose_grad_kernel.h b/paddle/phi/kernels/transpose_grad_kernel.h index e224da81a25d0702e63af0bab31e1bc4afbfc607..be617eae143ad30283832e93fb455baaba42a68b 100644 --- a/paddle/phi/kernels/transpose_grad_kernel.h +++ b/paddle/phi/kernels/transpose_grad_kernel.h @@ -26,4 +26,16 @@ void TransposeGradKernel(const Context& dev_ctx, const std::vector& axis, DenseTensor* x_grad); +template +void TransposeGradStrideKernel(const Context& dev_ctx, + const DenseTensor& out_grad, + const std::vector& axis, + DenseTensor* x_grad); +template +void TransLayoutGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& axis, + DenseTensor* x_grad); + } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_trans_layout_op.py b/python/paddle/fluid/tests/unittests/test_trans_layout_op.py new file mode 100644 index 0000000000000000000000000000000000000000..e6e1967d75e0020507e78d38a891a550ed1c24f3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_trans_layout_op.py @@ -0,0 +1,112 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import tempfile +import unittest + +import numpy as np +from eager_op_test import OpTest + +import paddle + + +def transpose_layout(x, src_layout, dst_layout): + return x.transpose([0, 2, 3, 1]) + + +class TestTransferLayoutFP16Op(OpTest): + def setUp(self): + paddle.enable_static() + self.op_type = 'transfer_layout' + self.dtype = np.float16 + x = np.random.random(size=[2, 5, 10, 10]) + self.inputs = {'X': x.astype(self.dtype)} + self.outputs = {'Out': x.transpose([0, 2, 3, 1])} + self.attrs = {'src_layout': 0, 'dst_layout': 1} + self.python_api = transpose_layout + + def test_check_output(self): + self.check_output() + + +class LayoutAutoTune(unittest.TestCase): + def test_config(self): + paddle.fluid.core.enable_layout_autotune() + if self.use_autoune(): + self.assertEqual(paddle.fluid.core.use_layout_autotune(), True) + paddle.fluid.core.disable_layout_autotune() + self.assertEqual(paddle.fluid.core.use_layout_autotune(), False) + self.use_autoune() + + def setUp(self): + paddle.disable_static() + self.use_autoune() + + def use_autoune(self): + if paddle.is_compiled_with_cuda(): + paddle.incubate.autotune.set_config( + config={"layout": {"enable": True}} + ) + return paddle.fluid.core.use_layout_autotune() + else: + config = {"layout": {"enable": False}} + tfile = tempfile.NamedTemporaryFile(mode="w+", delete=False) + json.dump(config, tfile) + tfile.close() + paddle.incubate.autotune.set_config(tfile.name) + os.remove(tfile.name) + return paddle.fluid.core.use_layout_autotune() + + def test_flatten_op_transposer(self): + conv = paddle.nn.Conv2D(3, 8, (3, 3)) + flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2) + data = paddle.rand([1, 3, 16, 14]) + with paddle.amp.auto_cast(level="O2"): + conv_out = conv(data) + # conv_out.shape = [1, 14, 12, 8] with NHWC + # layout tuner will transpose conv_out to + # [1, 8, 14, 12] with NCHW before the following flatten op + # because it flatten the C and H dimensions. + out = flatten(conv_out) + + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [1, 112, 12]) + + def test_argmax_op_transposer_keep_dims(self): + conv = paddle.nn.Conv2D(3, 8, (3, 3)) + data = paddle.rand([1, 3, 16, 14]) + with paddle.amp.auto_cast(level="O2"): + conv_out = conv(data) + # conv_out.shape = [1, 14, 12, 8] with NHWC + out = paddle.argmax(conv_out, axis=1, keepdim=True) + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [1, 1, 14, 12]) + + def test_concat_op_transposer(self): + in1 = paddle.rand([1, 8, 14, 12]) + conv = paddle.nn.Conv2D(3, 8, (3, 3)) + data = paddle.rand([1, 3, 16, 14]) + with paddle.amp.auto_cast(level="O2"): + conv_out = conv(data) + # conv_out.shape = [1, 14, 12, 8] with NHWC + out = paddle.concat(x=[conv_out, in1], axis=0) + + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [2, 8, 14, 12]) + + +if __name__ == '__main__': + unittest.main()