Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
d7d9807e
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d7d9807e
编写于
9月 05, 2022
作者:
N
niuliling123
提交者:
GitHub
9月 05, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add eager layout autotune (#45409)
上级
cfaee812
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
997 addition
and
31 deletion
+997
-31
paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
...r/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
+32
-0
paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
...le/fluid/eager/auto_code_generator/generator/eager_gen.py
+120
-11
paddle/fluid/eager/eager_layout_auto_tune.h
paddle/fluid/eager/eager_layout_auto_tune.h
+276
-0
paddle/fluid/eager/eager_layout_transformer.h
paddle/fluid/eager/eager_layout_transformer.h
+493
-0
paddle/fluid/imperative/layout_autotune.cc
paddle/fluid/imperative/layout_autotune.cc
+8
-6
paddle/fluid/imperative/layout_autotune.h
paddle/fluid/imperative/layout_autotune.h
+9
-3
paddle/fluid/imperative/layout_transformer.h
paddle/fluid/imperative/layout_transformer.h
+8
-1
paddle/fluid/pybind/eager_properties.cc
paddle/fluid/pybind/eager_properties.cc
+21
-0
paddle/fluid/pybind/imperative.cc
paddle/fluid/pybind/imperative.cc
+9
-0
paddle/phi/api/lib/data_transform.cc
paddle/phi/api/lib/data_transform.cc
+19
-7
python/paddle/fluid/tests/unittests/test_layout_autotune.py
python/paddle/fluid/tests/unittests/test_layout_autotune.py
+0
-3
python/paddle/nn/functional/conv.py
python/paddle/nn/functional/conv.py
+2
-0
未找到文件。
paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
浏览文件 @
d7d9807e
...
...
@@ -17,6 +17,7 @@
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/eager_amp_auto_cast.h"
#include "paddle/fluid/eager/eager_layout_auto_tune.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
...
...
@@ -73,6 +74,37 @@ paddle::experimental::Tensor conv2d_dygraph_function(
}
}
// Layout autotune
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
())
{
VLOG
(
5
)
<<
"Check and Prepare For LAYOUT"
;
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
tensors_vector
=
{{
input
},
{
filter
}};
auto
op_name
=
phi
::
TransToFluidOpName
(
"conv2d"
);
auto
transformer
=
egr
::
EagerLayoutAutotune
<
std
::
string
>
(
op_name
,
tensors_vector
,
&
data_format
);
auto
NEW_input
=
transformer
->
TransInTensor
(
"input"
,
input
);
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
auto
out
=
conv2d_dygraph_function
(
NEW_input
,
filter
,
strides
,
paddings
,
paddding_algorithm
,
groups
,
dilations
,
data_format
,
use_addto
,
workspace_size_MB
,
exhaustive_search
);
transformer
->
SetOutTensorLayout
(
&
out
);
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
EnableLayoutAutoTune
();
// Returns
return
out
;
}
// Get Input AutoGradMeta
egr
::
AutogradMeta
*
input_autograd_meta
=
egr
::
EagerUtils
::
nullable_autograd_meta
(
input
);
...
...
paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
浏览文件 @
d7d9807e
...
...
@@ -185,6 +185,8 @@ FORWARD_FUNCTION_TEMPLATE = \
// Dygraph Record Event
{}
// AMP Logic
{}
// Layout autotune
{}
// Get Input AutoGradMeta
{}
...
...
@@ -217,7 +219,8 @@ FORWARD_ONLY_FUNCTION_TEMPLATE = \
{}
// AMP Logic
{}
// Layout autotune
{}
// Forward API Call
VLOG(3) <<
\"
Final State Running:
\"
<<
\"
{}
\"
;
{}
...
...
@@ -295,7 +298,6 @@ NODE_CC_FILE_TEMPLATE = \
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/to_static/run_program_op_node.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/phi/api/include/sparse_api.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
DECLARE_bool(check_nan_inf);
...
...
@@ -317,7 +319,7 @@ FORWARD_CC_FILE_TEMPLATE = \
#include "paddle/phi/api/lib/dygraph_api.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.h"
#include "paddle/fluid/eager/eager_layout_auto_tune.h"
#include "paddle/phi/api/include/strings_api.h"
#include "paddle/phi/api/include/sparse_api.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
...
...
@@ -396,7 +398,21 @@ AMP_LOGIC_TEMPLATE = \
}}
}}
"""
LAYOUT_LOGIC_TEMPLATE
=
\
"""
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{
VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{}
{}
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
{}
{}
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns
return {};
}}
"""
CREATE_PLAIN_OPTIONAL_TENSOR_TEMPLATE
=
\
"""
paddle::optional<paddle::experimental::Tensor> {}_optional;
...
...
@@ -992,6 +1008,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_tensors_vector_optional_list
=
[]
amp_autocast_list
=
[]
amp_autocast_optional_list
=
[]
layout_autotune_list
=
[]
layout_autotune_optional_list
=
[]
layout_tensors_vector_optional_list
=
[]
for
name
,
(
ttype
,
pos
)
in
forward_inputs_position_map
.
items
():
inputs_call_list
[
pos
]
=
f
"
{
name
}
"
amp_inputs_call_list
[
pos
]
=
f
"NEW_
{
name
}
"
...
...
@@ -1009,6 +1028,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_optional_list
.
append
(
f
"auto NEW_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_tensors_vector_optional_list
.
append
(
f
"if (
{
name
}
) tensors_vector.push_back({{ *
{
name
}
}});
\n
"
)
layout_autotune_optional_list
.
append
(
f
"auto NEW_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
):
...
...
@@ -1023,6 +1048,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list
.
append
(
f
"auto NEW_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_list
.
append
(
f
"auto NEW_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
assert
IsVectorTensorType
(
ttype
)
if
is_optional
:
...
...
@@ -1037,6 +1065,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_optional_list
.
append
(
f
"auto NEW_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_optional_list
.
append
(
f
"auto NEW_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
):
...
...
@@ -1047,10 +1078,59 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list
.
append
(
f
"auto NEW_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_list
.
append
(
f
"auto NEW_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
inputs_args_definition_list
[
pos
]
=
arg_str
inputs_args_declaration_list
[
pos
]
=
arg_str
# for layout autotune attr
lightly_sensitive_attr
=
[
'axis'
,
'axes'
,
'dim'
,
'dims'
,
'start'
,
'end'
,
'stop'
]
heavily_sensitive_attr
=
[
'data_format'
,
'data_layout'
]
layout_autotune_attr
=
[]
layout_autotune_attr_code_list
=
[]
layout_autotune_attr_type_list
=
[]
layout_autotune_attr_code_list
.
append
(
f
"auto op_name = phi::TransToFluidOpName(
\"
{
forward_api_name
}
\"
);
\n
"
)
lightly_flag
=
False
heavily_flag
=
False
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
for
attr_name
in
lightly_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
name
not
in
layout_autotune_attr
:
lightly_flag
=
True
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
if
lightly_flag
is
False
:
for
attr_name
in
heavily_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
name
not
in
layout_autotune_attr
:
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
heavily_flag
=
True
if
len
(
layout_autotune_attr
)
==
0
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
1
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
2
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
,
{
layout_autotune_attr_type_list
[
1
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
, &
{
layout_autotune_attr
[
1
]
}
);
\n
"
)
else
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector,
{
len
(
layout_autotune_attr
)
}
);
\n
"
)
# forward attrs
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
inputs_call_list
[
pos
]
=
name
amp_inputs_call_list
[
pos
]
=
name
...
...
@@ -1236,6 +1316,35 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_tensors_vector_optional_list_str
,
amp_get_dst_dtype_str
,
amp_autocast_list_str
,
amp_call_str
)
# Forward layout autotune
layout_inputs_call_args_str
=
amp_inputs_call_args_str
layout_tmp_result_list
=
[]
layout_autotune_outs_list
=
""
if
num_outputs
==
1
:
layout_autotune_outs_list
+=
f
"
{
indent
}
auto
{
returns_str
}
= api_result;
\n
"
layout_autotune_outs_list
+=
f
"
{
indent
}
transformer -> SetOutTensorLayout(&
{
returns_str
}
);
\n
"
else
:
for
name
,
(
rtype
,
pos
)
in
forward_outputs_position_map
.
items
():
if
name
in
intermediate_outputs
:
continue
layout_autotune_outs_list
+=
f
"
{
indent
}
auto&
{
name
}
= std::get<
{
len
(
layout_tmp_result_list
)
}
>(api_result);
\n
"
layout_autotune_outs_list
+=
f
"
{
indent
}
transformer -> SetOutTensorLayout(&
{
name
}
);
\n
"
layout_tmp_result_list
.
append
(
f
"
{
name
}
"
)
if
returns_type_str
==
"paddle::experimental::Tensor&"
or
forward_api_name
==
"slice"
or
forward_api_name
==
"strided_slice"
or
len
(
layout_autotune_attr
)
==
0
:
layout_logic_str
=
""
else
:
# after_call_str = f"return {forward_function_name}({layout_inputs_call_args_str});\n"
after_call_str
=
f
"auto api_result =
{
forward_function_name
}
(
{
layout_inputs_call_args_str
}
);
\n
"
layout_logic_str
=
LAYOUT_LOGIC_TEMPLATE
.
format
(
amp_tensors_vector_list_str
,
" "
.
join
(
layout_tensors_vector_optional_list
),
" "
.
join
(
layout_autotune_attr_code_list
)
+
" "
+
" "
.
join
(
layout_autotune_list
)
+
" "
.
join
(
layout_autotune_optional_list
),
after_call_str
,
layout_autotune_outs_list
,
returns_str
)
# Generate forward_definition_str and forward_declaration_str
if
self
.
is_forward_only
:
if
len
(
amp_tensors_vector_list
)
==
0
:
...
...
@@ -1243,17 +1352,17 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
self
.
forward_definition_str
+=
FORWARD_ONLY_FUNCTION_TEMPLATE
.
format
(
returns_type_str
,
forward_function_name
,
inputs_args_definition_str
,
dygraph_event_str
,
amp_logic_str
,
forward_function_name
,
forward_call_str
,
get_outputs
_str
,
returns_str
)
layout_logic_str
,
forward_function_name
,
forward_call
_str
,
get_outputs_str
,
returns_str
)
else
:
self
.
forward_definition_str
+=
FORWARD_FUNCTION_TEMPLATE
.
format
(
returns_type_str
,
forward_function_name
,
inputs_args_definition_str
,
dygraph_event_str
,
amp_logic_str
,
inputs_autograd_meta_str
,
forward_function_name
,
forward_
call_str
,
check_nan_inf_str
,
get_outputs
_str
,
outputs_autograd_meta_str
,
compute_require_grad_args
_str
,
c
heck_inplace_str
,
bump_inplace_version_str
,
node_creation
_str
,
returns_str
)
layout_logic_str
,
inputs_autograd_meta_str
,
forward_
function_name
,
forward_call_str
,
check_nan_inf
_str
,
get_outputs_str
,
outputs_autograd_meta
_str
,
c
ompute_require_grad_args_str
,
check_inplace
_str
,
bump_inplace_version_str
,
node_creation_str
,
returns_str
)
self
.
forward_declaration_str
+=
f
"
{
returns_type_str
}
{
forward_function_name
}
(
{
inputs_args_declaration_str
}
);
\n
"
...
...
paddle/fluid/eager/eager_layout_auto_tune.h
0 → 100644
浏览文件 @
d7d9807e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/eager_layout_transformer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace
egr
{
// layout_agnostic_ops_
// For agnostic op like add / relu
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
{
VLOG
(
3
)
<<
" Optimze Layout agnostic op: "
<<
op_name
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
// For lightly op like reduce
template
<
typename
T
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
T
*
attr
)
{
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
bool
unstart
=
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
);
if
(
unstart
)
{
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
// For lightly op like argmax
template
<
typename
T1
,
typename
T2
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
T1
*
axis
,
T2
*
keep_dim
)
{
return
EagerLayoutAutotune
<
T1
>
(
op_name
,
tensors_vector
,
axis
);
}
// heavily string data_format data_layout
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
std
::
string
*
attr
)
{
VLOG
(
3
)
<<
" Optimze Layout heavily op: "
<<
op_name
;
auto
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
// Layout autotune only supports model with convolutional layers
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
if
(
op_name
!=
"conv2d"
)
{
return
transposer
;
}
else
{
#if defined(PADDLE_WITH_CUDA)
if
(
paddle
::
platform
::
is_gpu_place
(
tensors_vector
[
0
][
0
].
place
())
&&
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
return
transposer
;
}
#endif
auto
data_type
=
tensors_vector
[
0
][
0
].
dtype
();
bool
is_tune_fp32
=
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
)
&&
(
*
attr
==
"NHWC"
);
bool
is_tune_fp16
=
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
&&
(
*
attr
==
"NCHW"
);
if
(
is_tune_fp32
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
paddle
::
experimental
::
DataLayout
::
NCHW
);
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDefaultLayout
(
paddle
::
experimental
::
DataLayout
::
NHWC
);
}
else
if
(
is_tune_fp16
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
paddle
::
experimental
::
DataLayout
::
NHWC
);
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDefaultLayout
(
paddle
::
experimental
::
DataLayout
::
NCHW
);
}
else
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
return
transposer
;
}
VLOG
(
3
)
<<
"Tune the layout from "
<<
attr
<<
" to "
<<
paddle
::
framework
::
DataLayoutToString
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
.
GetDesiredLayout
());
}
}
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
IsHeavilyLayoutSensitive
(
op_name
))
{
auto
heavily_transposer
=
std
::
make_shared
<
EagerHeavilyLayoutSensitiveOpTransformer
>
(
op_name
,
attr
);
return
heavily_transposer
;
}
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is unimplemented. Use default "
"LayoutTransformer instead."
;
return
transposer
;
}
// lightly transpose
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
std
::
vector
<
int
>*
attr
)
{
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
" Optimze Layout Unstarted : "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
if
(
op_name
==
"transpose2"
)
{
auto
trans
=
std
::
make_shared
<
EagerTransposeOpTransformer
>
(
op_name
);
if
(
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
())
{
trans
->
SetAttr
(
attr
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
trans
;
}
}
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
// lightly int argmax
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
paddle
::
experimental
::
Scalar
,
bool
>
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
paddle
::
experimental
::
Scalar
*
axis
,
bool
*
keep_dim
)
{
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
" Optimze Layout Unstarted : "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
op_name
==
"argmax"
)
{
std
::
shared_ptr
<
EagerArgmaxOpTransformer
>
argmax_transform
=
nullptr
;
argmax_transform
=
std
::
make_shared
<
EagerArgmaxOpTransformer
>
(
op_name
);
if
((
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
)
&&
(
*
keep_dim
))
{
argmax_transform
->
SetAttr
(
axis
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
argmax_transform
;
}
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
// lightly int flatten
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
int
,
int
>
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
int
*
start_axis
,
int
*
stop_axis
)
{
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
" Optimze Layout Unstarted : "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
bool
no_tranpose
=
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
bool
is_valid
=
((
*
start_axis
)
==
1
&&
(
*
stop_axis
)
==
3
);
if
(
op_name
==
"flatten"
||
op_name
==
"flatten_contiguous_range"
)
{
if
(
no_tranpose
&&
is_valid
)
{
std
::
shared_ptr
<
EagerFlattenOpTransformer
>
flatten_transform
=
nullptr
;
flatten_transform
=
std
::
make_shared
<
EagerFlattenOpTransformer
>
(
op_name
);
return
flatten_transform
;
}
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
// lightly int Concat
// lightly T can be int vector<int> vector<int64_t> IntArray
template
<
>
// default int
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
paddle
::
experimental
::
Scalar
>
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
paddle
::
experimental
::
Scalar
*
axis
)
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
desired_layout
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
" Optimze Layout Unstarted : "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
bool
need_transpose
=
false
;
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
desired_layout
!=
tensors_vector
[
i
][
idx
].
layout
())
{
need_transpose
=
true
;
}
}
}
if
(
need_transpose
)
{
VLOG
(
3
)
<<
"Concat need transpose to NCHW "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
else
{
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
auto
trans
=
std
::
make_shared
<
EagerConcatOpTransformer
>
(
op_name
);
trans
->
SetAttr
(
axis
,
desired_layout
);
return
trans
;
}
}
}
// namespace egr
paddle/fluid/eager/eager_layout_transformer.h
0 → 100644
浏览文件 @
d7d9807e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_utils.h"
namespace
egr
{
inline
paddle
::
experimental
::
Tensor
EagerTraceTransposeOp
(
const
paddle
::
experimental
::
DataLayout
layout
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
if
(
in
.
shape
().
size
()
!=
4
)
{
VLOG
(
4
)
<<
"Shape is "
<<
in
.
shape
().
size
()
<<
" can't transpose to"
<<
paddle
::
framework
::
DataLayoutToString
(
layout
);
return
in
;
}
std
::
vector
<
int
>
axis
;
if
(
layout
==
paddle
::
experimental
::
DataLayout
::
NHWC
)
{
axis
=
{
0
,
2
,
3
,
1
};
}
else
if
(
layout
==
paddle
::
experimental
::
DataLayout
::
NCHW
)
{
axis
=
{
0
,
3
,
1
,
2
};
}
else
{
axis
=
{
0
,
1
,
2
,
3
};
}
auto
out_tensor
=
transpose_dygraph_function
(
in
,
axis
);
VLOG
(
4
)
<<
"AutoTune Transpose from "
<<
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
())
<<
" to "
<<
paddle
::
framework
::
DataLayoutToString
(
layout
);
return
out_tensor
;
}
// agnostic op
class
EagerLayoutTransformer
{
public:
EagerLayoutTransformer
()
:
op_name_
(
""
)
{}
explicit
EagerLayoutTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
:
op_name_
(
op_name
)
{
final_layout_
=
"UNDEFINED"
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
final_layout_
==
"UNDEFINED"
)
{
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
tensors_vector
[
0
][
0
].
layout
());
}
else
if
(
tensors_vector
[
i
][
idx
].
layout
()
==
desired_layout
)
{
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout
);
break
;
}
}
}
VLOG
(
4
)
<<
op_name_
<<
"final_layout_ is "
<<
final_layout_
;
}
EagerLayoutTransformer
(
const
EagerLayoutTransformer
&
)
=
delete
;
EagerLayoutTransformer
&
operator
=
(
const
EagerLayoutTransformer
&
)
=
delete
;
virtual
~
EagerLayoutTransformer
()
{}
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, final_layout_ is "
<<
final_layout_
;
return
in
;
}
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
in
)
{
return
in
;
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
return
in
;
}
virtual
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
return
in
;
}
virtual
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
bool
use_default
=
(
final_layout_
==
"Undefined(AnyLayout)"
||
final_layout_
==
(
"UNDEFINED"
));
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
if
(
!
use_default
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
}
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
virtual
void
SetOutTensorLayout
(
std
::
vector
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
bool
use_default
=
(
final_layout_
==
"Undefined(AnyLayout)"
||
final_layout_
==
(
"UNDEFINED"
));
if
(
!
use_default
)
{
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
((
*
out_tensor
)[
i
].
impl
().
get
()))
->
layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
}
}
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
};
class
EagerHeavilyLayoutSensitiveOpTransformer
:
public
EagerLayoutTransformer
{
public:
explicit
EagerHeavilyLayoutSensitiveOpTransformer
(
const
std
::
string
&
op_name
,
std
::
string
*
layout
)
:
op_name_
(
op_name
),
desired_layout_
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
())
{
VLOG
(
3
)
<<
"Optimze Layout heavily op: "
<<
op_name
;
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout_
);
if
((
*
layout
)
!=
final_layout_
)
{
*
layout
=
final_layout_
;
}
}
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is heavily"
;
return
in
;
}
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is heavily"
;
return
in
;
}
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
if
(
heavily_input_
.
count
(
in_name
)
!=
0
&&
in
.
layout
()
!=
desired_layout_
)
{
VLOG
(
4
)
<<
op_name_
<<
"'s "
<<
in_name
<<
" need transpose from "
<<
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
())
<<
" to "
<<
final_layout_
;
auto
out_tensor
=
EagerTraceTransposeOp
(
desired_layout_
,
in
);
return
out_tensor
;
}
return
in
;
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
if
(
out_tensor
->
layout
()
!=
desired_layout_
)
{
VLOG
(
4
)
<<
" Set Out_tensor's layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
desired_layout_
;
}
}
void
SetOutTensorLayout
(
std
::
vector
<
paddle
::
experimental
::
Tensor
*>*
out_tensor
)
{
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
SetOutTensorLayout
((
*
out_tensor
)[
i
]);
}
}
void
SetOutTensorLayout
(
std
::
vector
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
if
((
*
out_tensor
)[
i
].
layout
()
!=
desired_layout_
)
{
VLOG
(
4
)
<<
" Set Out_tensor's layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
(
*
out_tensor
)[
i
].
layout
())
<<
" to "
<<
final_layout_
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
((
*
out_tensor
)[
i
].
impl
().
get
()))
->
layout
=
desired_layout_
;
}
}
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
const
paddle
::
experimental
::
DataLayout
desired_layout_
;
std
::
unordered_set
<
std
::
string
>
heavily_input_
{
"x"
,
"y"
,
"input"
};
};
class
EagerLightlyLayoutSensitiveOpTransformer
:
public
EagerLayoutTransformer
{
public:
EagerLightlyLayoutSensitiveOpTransformer
()
{}
explicit
EagerLightlyLayoutSensitiveOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout
);
}
// transpose from desired to default
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
std
::
string
input_layout
=
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
if
(
final_layout_
==
input_layout
&&
in
.
shape
().
size
()
==
4
)
{
VLOG
(
4
)
<<
op_name_
<<
"'s "
<<
in_name
<<
" need transpose from "
<<
input_layout
<<
" to default_layout"
;
auto
out_tensor
=
EagerTraceTransposeOp
(
paddle
::
experimental
::
DataLayout
::
UNDEFINED
,
in
);
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
.
impl
().
get
()))
->
layout
=
default_layout
;
return
out_tensor
;
}
VLOG
(
4
)
<<
in_name
<<
"'s layout is "
<<
input_layout
;
return
in
;
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
std
::
vector
<
paddle
::
experimental
::
Tensor
>
result
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
for
(
size_t
i
=
0
;
i
<
in
.
size
();
i
++
)
{
auto
in_tensor
=
in
[
i
];
if
(
in_tensor
.
layout
()
==
desired_layout
)
{
VLOG
(
4
)
<<
op_name_
<<
"'s "
<<
in_name
<<
" need transpose from "
<<
final_layout_
<<
" to default_layout"
;
auto
out_tensor
=
EagerTraceTransposeOp
(
paddle
::
experimental
::
DataLayout
::
UNDEFINED
,
in_tensor
);
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
.
impl
().
get
()))
->
layout
=
default_layout
;
result
.
emplace_back
(
out_tensor
);
}
else
{
result
.
emplace_back
(
in_tensor
);
}
}
return
result
;
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
auto
out_layout
=
out_tensor
->
layout
();
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
if
(
out_layout
!=
default_layout
)
{
VLOG
(
4
)
<<
op_name_
<<
"'s out need transpose to default_layout"
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
default_layout
;
}
}
void
SetOutTensorLayout
(
std
::
vector
<
paddle
::
experimental
::
Tensor
*>*
out_tensor
)
{
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
VLOG
(
4
)
<<
"out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
(
*
out_tensor
)[
i
]
->
layout
());
SetOutTensorLayout
((
*
out_tensor
)[
i
]);
}
}
void
SetOutTensorLayout
(
std
::
vector
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
VLOG
(
4
)
<<
" out_tensor layout trans to default "
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
((
*
out_tensor
)[
i
].
impl
().
get
()))
->
layout
=
default_layout
;
}
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
std
::
unordered_set
<
std
::
string
>
heavily_input_
{
"x"
,
"y"
,
"input"
};
};
class
EagerTransposeOpTransformer
:
public
EagerLightlyLayoutSensitiveOpTransformer
{
public:
EagerTransposeOpTransformer
()
{}
explicit
EagerTransposeOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout TransposeOpTransformer "
<<
op_name
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
std
::
string
desired_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout
);
final_layout_
=
desired_layout_str
;
}
void
SetAttr
(
std
::
vector
<
int
>*
axis
,
bool
is_nhwc
)
{
// input's layout is nhwc and input's layout === desired_layout
std
::
vector
<
int
>
perm_nchw
=
{
0
,
2
,
3
,
1
};
std
::
vector
<
int
>
perm_nhwc
=
{
0
,
3
,
1
,
2
};
auto
perm
=
is_nhwc
?
perm_nhwc
:
perm_nchw
;
(
*
axis
)[
0
]
=
perm
[(
*
axis
)[
0
]];
(
*
axis
)[
1
]
=
perm
[(
*
axis
)[
1
]];
(
*
axis
)[
2
]
=
perm
[(
*
axis
)[
2
]];
(
*
axis
)[
3
]
=
perm
[(
*
axis
)[
3
]];
VLOG
(
4
)
<<
" EagerTransposeOpTransformer "
<<
op_name_
<<
"'s layout is equal to desire: "
<<
is_nhwc
;
}
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
VLOG
(
4
)
<<
"with no transpose: EagerTransposeOpTransformer "
<<
in_name
<<
"'s layout is "
<<
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
return
in
;
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
out_tensor
->
layout
()
!=
desired_layout
)
{
VLOG
(
4
)
<<
" Set Out_tensor's layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
desired_layout
;
}
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
std
::
unordered_set
<
std
::
string
>
heavily_input_
{
"x"
,
"y"
,
"input"
};
};
class
EagerArgmaxOpTransformer
:
public
EagerLightlyLayoutSensitiveOpTransformer
{
public:
EagerArgmaxOpTransformer
()
{}
explicit
EagerArgmaxOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
}
void
SetAttr
(
paddle
::
experimental
::
Scalar
*
axis
,
bool
is_nhwc
)
{
std
::
vector
<
int
>
perm_nhwc
=
{
0
,
3
,
1
,
2
};
std
::
vector
<
int
>
perm_nchw
=
{
0
,
2
,
3
,
1
};
auto
perm
=
is_nhwc
?
perm_nhwc
:
perm_nchw
;
int
axes
=
axis
->
to
<
int
>
();
(
*
axis
)
=
static_cast
<
paddle
::
experimental
::
Scalar
>
(
perm
[
axes
]);
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
VLOG
(
4
)
<<
"EagerArgmaxOpTransformer's out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
desired_layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
desired_layout
;
}
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
std
::
unordered_set
<
std
::
string
>
heavily_input_
{
"x"
,
"y"
,
"input"
};
};
class
EagerFlattenOpTransformer
:
public
EagerLightlyLayoutSensitiveOpTransformer
{
public:
EagerFlattenOpTransformer
()
{}
explicit
EagerFlattenOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
std
::
string
desired_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout
);
final_layout_
=
desired_layout_str
;
}
// transpose from NHWC to NCHW
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
return
in
;
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
VLOG
(
4
)
<<
"EagerArgmaxOpTransformer's out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
if
(
layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
}
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
std
::
unordered_set
<
std
::
string
>
heavily_input_
{
"x"
,
"y"
,
"input"
};
};
class
EagerConcatOpTransformer
:
public
EagerLightlyLayoutSensitiveOpTransformer
{
public:
EagerConcatOpTransformer
()
{}
explicit
EagerConcatOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
std
::
string
desired_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout
);
final_layout_
=
desired_layout_str
;
}
void
SetAttr
(
paddle
::
experimental
::
Scalar
*
axis
,
paddle
::
framework
::
DataLayout
layout
)
{
std
::
vector
<
int
>
perm_nhwc
=
{
0
,
3
,
1
,
2
};
std
::
vector
<
int
>
perm_nchw
=
{
0
,
2
,
3
,
1
};
int
axes
=
axis
->
to
<
int
>
();
auto
perm
=
(
paddle
::
framework
::
DataLayout
::
NHWC
==
layout
)
?
perm_nhwc
:
perm_nchw
;
(
*
axis
)
=
static_cast
<
paddle
::
experimental
::
Scalar
>
(
perm
[
axes
]);
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
return
in
;
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
if
(
layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
}
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
std
::
unordered_set
<
std
::
string
>
heavily_input_
{
"x"
,
"y"
,
"input"
};
};
}
// namespace egr
paddle/fluid/imperative/layout_autotune.cc
浏览文件 @
d7d9807e
...
...
@@ -25,12 +25,7 @@ namespace imperative {
bool
LayoutAutoTune
::
UseLayoutAutoTune
()
const
{
#if defined(PADDLE_WITH_CUDA)
if
(
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
return
false
;
}
else
{
return
use_layout_autotune_
;
}
return
use_layout_autotune_
;
#else
return
false
;
#endif
...
...
@@ -168,6 +163,12 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
if
(
op_type
!=
"conv2d"
)
{
return
ins
;
}
else
{
#if defined(PADDLE_WITH_CUDA)
if
(
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
return
ins
;
}
#endif
auto
conv_in_type
=
framework
::
proto
::
VarType
::
FP32
;
auto
&
in_vars
=
ins
.
at
(
"Input"
)[
0
];
if
(
GetDataType
<
VarType
>
(
in_vars
)
==
framework
::
proto
::
VarType
::
FP16
)
{
...
...
@@ -213,6 +214,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
return
transposer
->
Apply
(
ins
,
outs
,
attrs
,
tracer
);
}
}
template
paddle
::
imperative
::
NameVarMap
<
VarBase
>
AutoTuneLayout
<
VarBase
>
(
const
std
::
string
&
op_type
,
const
paddle
::
imperative
::
NameVarMap
<
VarBase
>&
ins
,
...
...
paddle/fluid/imperative/layout_autotune.h
浏览文件 @
d7d9807e
...
...
@@ -53,9 +53,13 @@ class LayoutAutoTune {
return
layout_agnostic_ops_
.
count
(
op_type
)
!=
0
;
}
DataLayout
GetDesiredLayout
()
const
{
return
layout_
;
}
DataLayout
GetDesiredLayout
()
const
{
return
desired_
layout_
;
}
void
SetDesiredLayout
(
const
DataLayout
&
layout
)
{
layout_
=
layout
;
}
DataLayout
GetDefaultLayout
()
const
{
return
default_layout_
;
}
void
SetDesiredLayout
(
const
DataLayout
&
layout
)
{
desired_layout_
=
layout
;
}
void
SetDefaultLayout
(
const
DataLayout
&
layout
)
{
default_layout_
=
layout
;
}
private:
LayoutAutoTune
();
...
...
@@ -69,7 +73,9 @@ class LayoutAutoTune {
std
::
unordered_set
<
std
::
string
>
lightly_layout_sensitive_ops_
{
"instance_norm"
,
"softmax"
,
"transpose"
,
"transpose2"
,
"reshape2"
};
DataLayout
layout_
{
DataLayout
::
UNDEFINED
};
DataLayout
desired_layout_
{
DataLayout
::
UNDEFINED
};
DataLayout
default_layout_
{
DataLayout
::
UNDEFINED
};
};
template
<
typename
VarType
>
...
...
paddle/fluid/imperative/layout_transformer.h
浏览文件 @
d7d9807e
...
...
@@ -77,6 +77,9 @@ class LayoutTransformer {
for
(
auto
&
var
:
pair
.
second
)
{
// Once the any input is desired layout, we set in_layout is desired
// layout.
if
(
in_layout
==
DataLayout
::
UNDEFINED
)
{
in_layout
=
paddle
::
imperative
::
GetDataLayout
(
var
);
}
if
(
var
!=
nullptr
&&
(
paddle
::
imperative
::
GetDataLayout
(
var
)
==
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()))
{
in_layout
=
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
...
...
@@ -84,7 +87,11 @@ class LayoutTransformer {
}
}
}
SetVarsLayout
(
outs
,
in_layout
);
VLOG
(
3
)
<<
"Optimze Layout agnostic op: "
<<
type_
<<
" "
<<
paddle
::
framework
::
DataLayoutToString
(
in_layout
);
if
(
in_layout
!=
DataLayout
::
UNDEFINED
)
{
SetVarsLayout
(
outs
,
in_layout
);
}
return
ins
;
}
...
...
paddle/fluid/pybind/eager_properties.cc
浏览文件 @
d7d9807e
...
...
@@ -188,6 +188,25 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject
*
tensor_properties_get_layout
(
TensorObject
*
self
,
void
*
closure
)
{
EAGER_TRY
std
::
string
layout
=
""
;
if
(
!
self
->
tensor
.
defined
())
{
return
ToPyObject
(
layout
);
}
if
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
))
{
VLOG
(
3
)
<<
"VariableCompatTensor does not support `layout` method."
;
return
ToPyObject
(
layout
);
}
else
{
return
ToPyObject
(
paddle
::
framework
::
DataLayoutToString
(
self
->
tensor
.
layout
()));
}
return
ToPyObject
(
layout
);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject
*
tensor_properties_get_place
(
TensorObject
*
self
,
void
*
closure
)
{
EAGER_TRY
return
ToPyObject
(
self
->
tensor
.
place
());
...
...
@@ -249,6 +268,7 @@ struct PyGetSetDef variable_properties[] = {
nullptr
,
nullptr
},
{
"shape"
,
(
getter
)
tensor_properties_get_shape
,
nullptr
,
nullptr
,
nullptr
},
{
"layout"
,
(
getter
)
tensor_properties_get_layout
,
nullptr
,
nullptr
,
nullptr
},
// {"is_leaf", (getter)tensor_properties_get_is_leaf, nullptr,
// nullptr,
// nullptr},
...
...
@@ -271,6 +291,7 @@ struct PyGetSetDef string_tensor_variable_properties[] = {
nullptr
,
nullptr
},
{
"shape"
,
(
getter
)
tensor_properties_get_shape
,
nullptr
,
nullptr
,
nullptr
},
{
"layout"
,
(
getter
)
tensor_properties_get_layout
,
nullptr
,
nullptr
,
nullptr
},
{
"place"
,
(
getter
)
tensor_properties_get_place
,
nullptr
,
nullptr
,
nullptr
},
{
"_place_str"
,
(
getter
)
tensor_properties_get_place_str
,
...
...
paddle/fluid/pybind/imperative.cc
浏览文件 @
d7d9807e
...
...
@@ -2062,6 +2062,15 @@ void BindImperative(py::module *m_ptr) {
return
std
::
vector
<
int
>
();
}
})
.
def_property_readonly
(
"layout"
,
[](
imperative
::
VarBase
&
self
)
{
if
(
self
.
Var
().
IsType
<
framework
::
LoDTensor
>
())
{
auto
layout
=
self
.
Var
().
Get
<
framework
::
LoDTensor
>
().
layout
();
return
paddle
::
framework
::
DataLayoutToString
(
layout
);
}
return
std
::
string
(
""
);
})
.
def_property_readonly
(
"is_leaf"
,
&
imperative
::
VarBase
::
IsLeaf
,
R"DOC(
...
...
paddle/phi/api/lib/data_transform.cc
浏览文件 @
d7d9807e
...
...
@@ -52,12 +52,16 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input,
return
ret
;
}
inline
bool
NeedTransformLayout
(
const
DataLayout
&
input
,
inline
bool
NeedTransformLayout
(
const
paddle
::
platform
::
Place
&
place
,
const
DataLayout
&
input
,
const
DataLayout
&
target
,
const
TransformFlag
&
transform_flag
)
{
bool
ret
=
transform_flag
.
need_trans_layout
()
&&
(
input
!=
DataLayout
::
ALL_LAYOUT
&&
target
!=
DataLayout
::
ALL_LAYOUT
&&
input
!=
target
);
if
(
platform
::
is_gpu_place
(
place
))
{
return
false
;
}
return
ret
;
}
...
...
@@ -73,6 +77,7 @@ inline phi::DenseTensor TransDataLayout(const phi::DenseTensor& tensor,
PADDLE_THROW
(
phi
::
errors
::
PreconditionNotMet
(
"Unsupported data layout cast from CPU to GPU."
));
}
return
tensor
;
}
template
<
typename
Context
>
...
...
@@ -196,8 +201,11 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
phi
::
DenseTensor
out
=
*
tensor
;
bool
trans_layout
=
false
;
bool
trans_dtype
=
false
;
if
(
NeedTransformLayout
(
tensor
->
layout
(),
target_args_def
.
layout
,
transform_flag
))
{
if
(
NeedTransformLayout
(
tensor
->
place
(),
tensor
->
layout
(),
target_args_def
.
layout
,
transform_flag
))
{
out
=
TransDataLayout
(
out
,
target_args_def
.
layout
);
trans_layout
=
true
;
}
...
...
@@ -232,8 +240,10 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
dense_tensor
.
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
!
NeedTransformDataType
(
dense_tensor
.
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
dense_tensor
.
layout
(),
target_args_def
.
layout
,
transform_flag
)))
{
!
NeedTransformLayout
(
dense_tensor
.
place
(),
dense_tensor
.
layout
(),
target_args_def
.
layout
,
transform_flag
)))
{
return
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
);
}
phi
::
DenseTensor
out
=
...
...
@@ -267,8 +277,10 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData(
tensor_in
->
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
!
NeedTransformDataType
(
tensor_in
->
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
tensor_in
->
layout
(),
target_args_def
.
layout
,
transform_flag
)))
{
!
NeedTransformLayout
(
tensor_in
->
place
(),
tensor_in
->
layout
(),
target_args_def
.
layout
,
transform_flag
)))
{
pt_tensors
->
emplace_back
(
*
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
));
}
else
{
...
...
python/paddle/fluid/tests/unittests/test_layout_autotune.py
浏览文件 @
d7d9807e
...
...
@@ -21,9 +21,6 @@ import numpy
import
paddle
import
paddle.nn.functional
as
F
from
paddle.fluid.framework
import
_enable_legacy_dygraph
_enable_legacy_dygraph
()
class
SimpleNet
(
paddle
.
nn
.
Layer
):
...
...
python/paddle/nn/functional/conv.py
浏览文件 @
d7d9807e
...
...
@@ -129,6 +129,8 @@ def _conv_nd(x,
if
bias
is
not
None
:
channel_dim
=
channel_dim
+
len
(
x
.
shape
)
if
channel_dim
<
0
else
channel_dim
if
pre_bias
.
layout
==
"NHWC"
:
channel_dim
=
3
# last dim
if
isinstance
(
x
,
tuple
):
x
=
x
[
0
]
if
isinstance
(
bias
,
tuple
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录