未验证 提交 3dce9f0a 编写于 作者: N niuliling123 提交者: GitHub

Tranpose layout (#53351)

* update

* Update backward.h

* Update composite_backward_api.h

* Update tensor_utils.cc

* Update backward.cc

* update

* stype

* update

* add ctest

* code stype
上级 34122e3e
...@@ -150,7 +150,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -150,7 +150,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
op_name, tensors_vector, tensors_vector[0][0].layout()); op_name, tensors_vector, tensors_vector[0][0].layout());
} }
if (op_name == "transpose2" && if ((op_name == "transpose2" || op_name == "trans_layout") &&
(tensors_vector[0][0].layout() == DesiredLayout())) { (tensors_vector[0][0].layout() == DesiredLayout())) {
auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name); auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
trans->SetAttr(attr, trans->SetAttr(attr,
......
...@@ -34,7 +34,7 @@ inline paddle::Tensor EagerTraceTransposeOp(const phi::DataLayout layout, ...@@ -34,7 +34,7 @@ inline paddle::Tensor EagerTraceTransposeOp(const phi::DataLayout layout,
} else { } else {
axis = {0, 1, 2, 3}; axis = {0, 1, 2, 3};
} }
auto out_tensor = transpose_ad_func(in, axis); auto out_tensor = trans_layout_ad_func(in, axis);
VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout; VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout;
return out_tensor; return out_tensor;
} }
......
...@@ -1019,6 +1019,15 @@ ...@@ -1019,6 +1019,15 @@
composite : tile_grad(x, outgrad, repeat_times, x_grad) composite : tile_grad(x, outgrad, repeat_times, x_grad)
backward : tile_double_grad backward : tile_double_grad
- backward_op : trans_layout_grad
forward : trans_layout (Tensor x, int[] perm) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int[] perm)
output : Tensor(x_grad)
infer_meta :
func : TransLayoutGradInferMeta
kernel :
func : trans_layout_grad
- backward_op : transpose_double_grad - backward_op : transpose_double_grad
forward : transpose_grad (Tensor grad_out, int[] perm) -> Tensor(grad_x) forward : transpose_grad (Tensor grad_out, int[] perm) -> Tensor(grad_x)
args : (Tensor grad_x_grad, int[] perm) args : (Tensor grad_x_grad, int[] perm)
......
...@@ -1103,6 +1103,15 @@ ...@@ -1103,6 +1103,15 @@
func : tile func : tile
backward : tile_grad backward : tile_grad
- op : trans_layout
args : (Tensor x, int[] perm)
output : Tensor
infer_meta :
func : TransposeInferMeta
kernel :
func : transpose
backward : trans_layout_grad
- op : transpose - op : transpose
args : (Tensor x, int[] perm) args : (Tensor x, int[] perm)
output : Tensor output : Tensor
......
...@@ -1069,6 +1069,33 @@ void StackGradInferMeta(const MetaTensor& out_grad, ...@@ -1069,6 +1069,33 @@ void StackGradInferMeta(const MetaTensor& out_grad,
} }
} }
void TransposeGradInferMeta(const MetaTensor& x,
const std::vector<int>& axis,
MetaTensor* out) {
size_t x_rank = x.dims().size();
std::vector<int> formated_axis = axis;
for (size_t i = 0; i < axis.size(); i++) {
if (axis[i] < 0) {
formated_axis[i] = axis[i] + x_rank;
}
}
std::vector<int> reversed_axis(axis);
for (size_t i = 0; i < formated_axis.size(); i++) {
reversed_axis[formated_axis[i]] = i;
}
TransposeInferMeta(x, reversed_axis, out);
}
void TransLayoutGradInferMeta(const MetaTensor& x,
const MetaTensor& out_grad,
const std::vector<int>& axis,
MetaTensor* x_grad) {
TransposeGradInferMeta(out_grad, axis, x_grad);
x_grad->set_layout(static_cast<DataLayout>(x.layout()));
}
void UniformRandomInplaceGradInferMeta(const MetaTensor& out_grad, void UniformRandomInplaceGradInferMeta(const MetaTensor& out_grad,
float min, float min,
float max, float max,
......
...@@ -413,6 +413,14 @@ void StackGradInferMeta(const MetaTensor& out_grad, ...@@ -413,6 +413,14 @@ void StackGradInferMeta(const MetaTensor& out_grad,
int axis, int axis,
std::vector<MetaTensor*> x_grad); std::vector<MetaTensor*> x_grad);
void TransposeInferMeta(const MetaTensor& x,
const std::vector<int>& axis,
MetaTensor* out);
void TransLayoutGradInferMeta(const MetaTensor& x,
const MetaTensor& out_grad,
const std::vector<int>& axis,
MetaTensor* out);
void UniformRandomInplaceGradInferMeta(const MetaTensor& out_grad, void UniformRandomInplaceGradInferMeta(const MetaTensor& out_grad,
float min, float min,
float max, float max,
......
...@@ -4483,25 +4483,6 @@ void TransposeInferMeta(const MetaTensor& x, ...@@ -4483,25 +4483,6 @@ void TransposeInferMeta(const MetaTensor& x,
out->set_dtype(x.dtype()); out->set_dtype(x.dtype());
} }
void TransposeGradInferMeta(const MetaTensor& x,
const std::vector<int>& axis,
MetaTensor* out) {
size_t x_rank = x.dims().size();
std::vector<int> formated_axis = axis;
for (size_t i = 0; i < axis.size(); i++) {
if (axis[i] < 0) {
formated_axis[i] = axis[i] + x_rank;
}
}
std::vector<int> reversed_axis(axis);
for (size_t i = 0; i < formated_axis.size(); i++) {
reversed_axis[formated_axis[i]] = i;
}
TransposeInferMeta(x, reversed_axis, out);
}
void UnbindInferMeta(const MetaTensor& x, void UnbindInferMeta(const MetaTensor& x,
int axis, int axis,
std::vector<MetaTensor*> outs) { std::vector<MetaTensor*> outs) {
......
...@@ -31,3 +31,16 @@ PD_REGISTER_KERNEL(transpose_grad, ...@@ -31,3 +31,16 @@ PD_REGISTER_KERNEL(transpose_grad,
phi::dtype::bfloat16, phi::dtype::bfloat16,
phi::dtype::complex<float>, phi::dtype::complex<float>,
phi::dtype::complex<double>) {} phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(trans_layout_grad,
CPU,
ALL_LAYOUT,
phi::TransLayoutGradKernel,
bool,
float,
double,
int32_t,
int64_t,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
...@@ -32,3 +32,17 @@ PD_REGISTER_KERNEL(transpose_grad, ...@@ -32,3 +32,17 @@ PD_REGISTER_KERNEL(transpose_grad,
phi::dtype::bfloat16, phi::dtype::bfloat16,
phi::dtype::complex<float>, phi::dtype::complex<float>,
phi::dtype::complex<double>) {} phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(trans_layout_grad,
GPU,
ALL_LAYOUT,
phi::TransLayoutGradKernel,
bool,
float,
double,
int32_t,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
...@@ -42,4 +42,13 @@ void TransposeGradKernel(const Context& dev_ctx, ...@@ -42,4 +42,13 @@ void TransposeGradKernel(const Context& dev_ctx,
TransposeKernel<T, Context>(dev_ctx, out_grad, reversed_axis, x_grad); TransposeKernel<T, Context>(dev_ctx, out_grad, reversed_axis, x_grad);
} }
template <typename T, typename Context>
void TransLayoutGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int>& axis,
DenseTensor* x_grad) {
TransposeGradKernel<T, Context>(dev_ctx, out_grad, axis, x_grad);
}
} // namespace phi } // namespace phi
...@@ -26,4 +26,16 @@ void TransposeGradKernel(const Context& dev_ctx, ...@@ -26,4 +26,16 @@ void TransposeGradKernel(const Context& dev_ctx,
const std::vector<int>& axis, const std::vector<int>& axis,
DenseTensor* x_grad); DenseTensor* x_grad);
template <typename Context>
void TransposeGradStrideKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& axis,
DenseTensor* x_grad);
template <typename T, typename Context>
void TransLayoutGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int>& axis,
DenseTensor* x_grad);
} // namespace phi } // namespace phi
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import tempfile
import unittest
import numpy as np
from eager_op_test import OpTest
import paddle
def transpose_layout(x, src_layout, dst_layout):
return x.transpose([0, 2, 3, 1])
class TestTransferLayoutFP16Op(OpTest):
def setUp(self):
paddle.enable_static()
self.op_type = 'transfer_layout'
self.dtype = np.float16
x = np.random.random(size=[2, 5, 10, 10])
self.inputs = {'X': x.astype(self.dtype)}
self.outputs = {'Out': x.transpose([0, 2, 3, 1])}
self.attrs = {'src_layout': 0, 'dst_layout': 1}
self.python_api = transpose_layout
def test_check_output(self):
self.check_output()
class LayoutAutoTune(unittest.TestCase):
def test_config(self):
paddle.fluid.core.enable_layout_autotune()
if self.use_autoune():
self.assertEqual(paddle.fluid.core.use_layout_autotune(), True)
paddle.fluid.core.disable_layout_autotune()
self.assertEqual(paddle.fluid.core.use_layout_autotune(), False)
self.use_autoune()
def setUp(self):
paddle.disable_static()
self.use_autoune()
def use_autoune(self):
if paddle.is_compiled_with_cuda():
paddle.incubate.autotune.set_config(
config={"layout": {"enable": True}}
)
return paddle.fluid.core.use_layout_autotune()
else:
config = {"layout": {"enable": False}}
tfile = tempfile.NamedTemporaryFile(mode="w+", delete=False)
json.dump(config, tfile)
tfile.close()
paddle.incubate.autotune.set_config(tfile.name)
os.remove(tfile.name)
return paddle.fluid.core.use_layout_autotune()
def test_flatten_op_transposer(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3))
flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2)
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
# layout tuner will transpose conv_out to
# [1, 8, 14, 12] with NCHW before the following flatten op
# because it flatten the C and H dimensions.
out = flatten(conv_out)
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 112, 12])
def test_argmax_op_transposer_keep_dims(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3))
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out, axis=1, keepdim=True)
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 1, 14, 12])
def test_concat_op_transposer(self):
in1 = paddle.rand([1, 8, 14, 12])
conv = paddle.nn.Conv2D(3, 8, (3, 3))
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.concat(x=[conv_out, in1], axis=0)
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [2, 8, 14, 12])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册