Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
16439bb9
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
16439bb9
编写于
9月 19, 2022
作者:
N
niuliling123
提交者:
GitHub
9月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update layoutautotune for inplace (#45826)
上级
46e4fb2a
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
443 addition
and
326 deletion
+443
-326
paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
...r/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
+11
-12
paddle/fluid/eager/api/utils/global_utils.h
paddle/fluid/eager/api/utils/global_utils.h
+17
-0
paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
...le/fluid/eager/auto_code_generator/generator/eager_gen.py
+112
-86
paddle/fluid/eager/eager_layout_auto_tune.h
paddle/fluid/eager/eager_layout_auto_tune.h
+112
-86
paddle/fluid/eager/eager_layout_transformer.h
paddle/fluid/eager/eager_layout_transformer.h
+90
-95
paddle/fluid/imperative/layout_autotune.cc
paddle/fluid/imperative/layout_autotune.cc
+24
-12
paddle/fluid/imperative/layout_autotune.h
paddle/fluid/imperative/layout_autotune.h
+19
-9
paddle/fluid/imperative/layout_transformer.h
paddle/fluid/imperative/layout_transformer.h
+18
-2
paddle/fluid/imperative/tracer.cc
paddle/fluid/imperative/tracer.cc
+2
-0
paddle/fluid/imperative/tracer.h
paddle/fluid/imperative/tracer.h
+16
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-11
paddle/phi/api/lib/data_transform.cc
paddle/phi/api/lib/data_transform.cc
+8
-8
python/paddle/fluid/tests/unittests/test_layout_autotune.py
python/paddle/fluid/tests/unittests/test_layout_autotune.py
+7
-0
python/paddle/nn/functional/conv.py
python/paddle/nn/functional/conv.py
+1
-3
未找到文件。
paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
浏览文件 @
16439bb9
...
...
@@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func(
auto
amp_dst_dtype
=
egr
::
GetAmpDestDtype
(
op_name
,
amp_tensors_vector
);
auto
NEW
_input
=
auto
new
_input
=
egr
::
EagerAmpAutoCast
(
"input"
,
input
,
amp_dst_dtype
,
op_name
);
auto
NEW
_filter
=
auto
new
_filter
=
egr
::
EagerAmpAutoCast
(
"filter"
,
filter
,
amp_dst_dtype
,
op_name
);
{
paddle
::
imperative
::
AutoCastGuard
guard
(
egr
::
Controller
::
Instance
().
GetCurrentTracer
(),
paddle
::
imperative
::
AmpLevel
::
O0
);
return
conv2d_ad_func
(
NEW
_input
,
NEW
_filter
,
return
conv2d_ad_func
(
new
_input
,
new
_filter
,
strides
,
paddings
,
paddding_algorithm
,
...
...
@@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func(
// Layout autotune
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
())
{
if
(
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
())
{
VLOG
(
5
)
<<
"Check and Prepare For LAYOUT"
;
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
...
...
@@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func(
auto
op_name
=
phi
::
TransToFluidOpName
(
"conv2d"
);
auto
transformer
=
egr
::
EagerLayoutAutotune
<
std
::
string
>
(
op_name
,
tensors_vector
,
&
data_format
);
auto
NEW_input
=
transformer
->
TransInTensor
(
"input"
,
input
);
bool
is_enable_tune
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
auto
out
=
conv2d_ad_func
(
NEW_input
,
auto
new_input
=
transformer
->
TransInTensor
(
"input"
,
input
);
bool
need_tune
=
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
();
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
auto
out
=
conv2d_ad_func
(
new_input
,
filter
,
strides
,
paddings
,
...
...
@@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func(
workspace_size_MB
,
exhaustive_search
);
transformer
->
SetOutTensorLayout
(
&
out
);
if
(
is_enable
_tune
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
EnableLayoutAutoTune
();
if
(
need
_tune
)
{
egr
::
Controller
::
Instance
().
EnableLayoutAutoTune
();
}
// Returns
return
out
;
...
...
paddle/fluid/eager/api/utils/global_utils.h
浏览文件 @
16439bb9
...
...
@@ -55,6 +55,23 @@ class Controller {
paddle
::
imperative
::
AmpLevel
GetAMPLevel
()
const
{
return
tracer_
->
GetAmpLevel
();
}
bool
UseLayoutAutoTune
()
{
bool
use_autotune
=
false
;
#if defined(PADDLE_WITH_CUDA)
auto
place
=
tracer_
->
ExpectedPlace
();
bool
is_gpu_place
=
paddle
::
platform
::
is_gpu_place
(
place
);
if
(
is_gpu_place
)
{
use_autotune
=
tracer_
->
UseLayoutAutoTune
();
}
#endif
return
use_autotune
;
}
void
DisableLayoutAutoTune
()
{
tracer_
->
DisableLayoutAutoTune
();
}
void
EnableLayoutAutoTune
()
{
tracer_
->
EnableLayoutAutoTune
();
}
bool
HasGrad
()
const
{
return
tracer_
->
HasGrad
();
}
void
SetHasGrad
(
bool
has_grad
)
{
tracer_
->
SetHasGrad
(
has_grad
);
}
std
::
string
GenerateUniqueName
(
std
::
string
key
=
"eager_in_tmp"
)
{
...
...
paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
浏览文件 @
16439bb9
...
...
@@ -437,15 +437,14 @@ AMP_LOGIC_TEMPLATE = \
"""
LAYOUT_LOGIC_TEMPLATE
=
\
"""
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{
VLOG(5) << "Check and Prepare For LAYOUT";
if (egr::Controller::Instance().UseLayoutAutoTune()) {{
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{}
{}
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
VLOG(5) << "Check and Prepare For LAYOUT "<< op_name;
paddle::imperative::LayoutAutotuneGuard guard(egr::Controller::Instance().GetCurrentTracer(), false);
{}
{}
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns
return {};
}}
...
...
@@ -922,6 +921,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
set_grad_in_meta
=
f
"
{
indent
}
grad_node->SetGradInMeta(
{
name
}
,
{
pos
}
);"
set_retain_grad
=
f
"
{
indent
}
egr::EagerUtils::CheckAndRetainGrad(
{
name
}
);"
set_out_rank_list
.
append
(
set_out_rank
)
set_history_list
.
append
(
set_history
)
set_grad_in_meta_list
.
append
(
set_grad_in_meta
)
...
...
@@ -1014,6 +1014,98 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
self
.
forward_definition_str
=
""
self
.
forward_declaration_str
=
""
def
GenerateForwardLayoutAutotune
(
self
,
forward_api_name
,
amp_tensors_vector_list
,
layout_tensors_vector_optional_list
,
layout_autotune_list_str
,
returns_type_str
,
returns_str
,
amp_inputs_call_args_str
):
intermediate_outputs
=
self
.
intermediate_outputs
forward_attrs_list
=
self
.
forward_attrs_list
forward_outputs_position_map
=
self
.
forward_outputs_position_map
num_outputs
=
len
(
forward_outputs_position_map
.
keys
())
-
len
(
intermediate_outputs
)
# for layout autotune attr
lightly_sensitive_attr
=
[
'axis'
,
'axes'
,
'dim'
,
'dims'
,
'start'
,
'end'
,
'stop'
]
heavily_sensitive_attr
=
[
'data_format'
,
'data_layout'
]
layout_autotune_attr
=
[]
layout_autotune_attr_code_list
=
[]
layout_autotune_attr_type_list
=
[]
layout_autotune_attr_code_list
.
append
(
f
"auto op_name = phi::TransToFluidOpName(
\"
{
forward_api_name
}
\"
);
\n
"
)
lightly_flag
=
False
heavily_flag
=
False
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
for
attr_name
in
lightly_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
(
name
not
in
layout_autotune_attr
):
lightly_flag
=
True
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
if
lightly_flag
is
False
:
for
attr_name
in
heavily_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
(
name
not
in
layout_autotune_attr
):
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
heavily_flag
=
True
if
len
(
layout_autotune_attr
)
==
0
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
1
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
2
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
,
{
layout_autotune_attr_type_list
[
1
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
, &
{
layout_autotune_attr
[
1
]
}
);
\n
"
)
else
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector,&
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
# Out tensor
layout_inputs_call_args_str
=
amp_inputs_call_args_str
forward_function_name
=
GetDygraphForwardFunctionName
(
forward_api_name
)
layout_tmp_result_list
=
[]
layout_autotune_outs_list
=
[]
result_name
=
"api_result"
if
num_outputs
==
1
:
result_name
=
returns_str
layout_autotune_outs_list
.
append
(
f
"transformer -> SetOutTensorLayout(&
{
returns_str
}
);
\n
"
)
else
:
for
name
,
(
rtype
,
pos
)
in
forward_outputs_position_map
.
items
():
if
name
in
intermediate_outputs
:
continue
layout_autotune_outs_list
.
append
(
f
" auto&
{
name
}
= std::get<
{
len
(
layout_tmp_result_list
)
}
>(api_result);
\n
"
)
layout_autotune_outs_list
.
append
(
f
" transformer -> SetOutTensorLayout(&
{
name
}
);
\n
"
)
layout_tmp_result_list
.
append
(
f
"
{
name
}
"
)
tensors_vector_list_str
=
"{ "
+
","
.
join
(
amp_tensors_vector_list
)
+
" }"
if
len
(
amp_tensors_vector_list
)
==
0
:
layout_logic_str
=
""
else
:
after_call_str
=
f
"
{
returns_type_str
}
{
result_name
}
=
{
forward_function_name
}
(
{
layout_inputs_call_args_str
}
);
\n
"
layout_logic_str
=
LAYOUT_LOGIC_TEMPLATE
.
format
(
tensors_vector_list_str
,
" "
.
join
(
layout_tensors_vector_optional_list
),
" "
.
join
(
layout_autotune_attr_code_list
)
+
" "
+
layout_autotune_list_str
,
after_call_str
,
" "
.
join
(
layout_autotune_outs_list
),
returns_str
)
return
layout_logic_str
def
GenerateForwardDefinitionAndDeclaration
(
self
,
is_inplaced
):
namespace
=
self
.
namespace
if
self
.
forward_api_name
[
-
1
]
==
'_'
and
not
is_inplaced
:
...
...
@@ -1049,7 +1141,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
layout_tensors_vector_optional_list
=
[]
for
name
,
(
ttype
,
pos
)
in
forward_inputs_position_map
.
items
():
inputs_call_list
[
pos
]
=
f
"
{
name
}
"
amp_inputs_call_list
[
pos
]
=
f
"
NEW
_
{
name
}
"
amp_inputs_call_list
[
pos
]
=
f
"
new
_
{
name
}
"
is_optional
=
(
name
in
optional_inputs
)
if
IsPlainTensorType
(
ttype
):
if
is_optional
:
...
...
@@ -1062,13 +1154,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f
"if (
{
name
}
) amp_tensors_vector.push_back({{ *
{
name
}
}});
\n
"
)
amp_autocast_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_tensors_vector_optional_list
.
append
(
f
"if (
{
name
}
) tensors_vector.push_back({{ *
{
name
}
}});
\n
"
)
layout_autotune_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
...
...
@@ -1076,16 +1168,16 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str
=
f
"paddle::experimental::Tensor&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
else
:
arg_str
=
f
"const paddle::experimental::Tensor&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_list
.
append
(
f
"auto
NEW
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
assert
IsVectorTensorType
(
ttype
)
...
...
@@ -1099,10 +1191,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f
"if (
{
name
}
) amp_tensors_vector.push_back( *
{
name
}
);
\n
"
)
amp_autocast_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_optional_list
.
append
(
f
"auto
NEW_
{
name
}
= transformer->TransInTensor
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new_
{
name
}
= transformer->TransInTensors
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
...
...
@@ -1112,60 +1204,15 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str
=
f
"const std::vector<paddle::experimental::Tensor>&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"
{
name
}
"
)
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_list
.
append
(
f
"auto
NEW_
{
name
}
= transformer->TransInTensor
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new_
{
name
}
= transformer->TransInTensors
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
inputs_args_definition_list
[
pos
]
=
arg_str
inputs_args_declaration_list
[
pos
]
=
arg_str
# for layout autotune attr
lightly_sensitive_attr
=
[
'axis'
,
'axes'
,
'dim'
,
'dims'
,
'start'
,
'end'
,
'stop'
]
heavily_sensitive_attr
=
[
'data_format'
,
'data_layout'
]
layout_autotune_attr
=
[]
layout_autotune_attr_code_list
=
[]
layout_autotune_attr_type_list
=
[]
layout_autotune_attr_code_list
.
append
(
f
"auto op_name = phi::TransToFluidOpName(
\"
{
forward_api_name
}
\"
);
\n
"
)
lightly_flag
=
False
heavily_flag
=
False
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
for
attr_name
in
lightly_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
name
not
in
layout_autotune_attr
:
lightly_flag
=
True
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
if
lightly_flag
is
False
:
for
attr_name
in
heavily_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
name
not
in
layout_autotune_attr
:
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
heavily_flag
=
True
if
len
(
layout_autotune_attr
)
==
0
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
1
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
2
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
,
{
layout_autotune_attr_type_list
[
1
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
, &
{
layout_autotune_attr
[
1
]
}
);
\n
"
)
else
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector,
{
len
(
layout_autotune_attr
)
}
);
\n
"
)
# forward attrs
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
inputs_call_list
[
pos
]
=
name
...
...
@@ -1356,33 +1403,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list_str
,
amp_call_str
)
# Forward layout autotune
layout_inputs_call_args_str
=
amp_inputs_call_args_str
layout_tmp_result_list
=
[]
layout_autotune_outs_list
=
""
if
num_outputs
==
1
:
layout_autotune_outs_list
+=
f
"
{
indent
}
auto
{
returns_str
}
= api_result;
\n
"
layout_autotune_outs_list
+=
f
"
{
indent
}
transformer -> SetOutTensorLayout(&
{
returns_str
}
);
\n
"
else
:
for
name
,
(
rtype
,
pos
)
in
forward_outputs_position_map
.
items
():
if
name
in
intermediate_outputs
:
continue
layout_autotune_outs_list
+=
f
"
{
indent
}
auto&
{
name
}
= std::get<
{
len
(
layout_tmp_result_list
)
}
>(api_result);
\n
"
layout_autotune_outs_list
+=
f
"
{
indent
}
transformer -> SetOutTensorLayout(&
{
name
}
);
\n
"
layout_tmp_result_list
.
append
(
f
"
{
name
}
"
)
if
returns_type_str
==
"paddle::experimental::Tensor&"
or
forward_api_name
==
"slice"
or
forward_api_name
==
"strided_slice"
or
len
(
layout_autotune_attr
)
==
0
:
layout_logic_str
=
""
else
:
# after_call_str = f"return {forward_ad_function_name}({layout_inputs_call_args_str});\n"
after_call_str
=
f
"auto api_result =
{
forward_ad_function_name
}
(
{
layout_inputs_call_args_str
}
);
\n
"
layout_logic_str
=
LAYOUT_LOGIC_TEMPLATE
.
format
(
amp_tensors_vector_list_str
,
" "
.
join
(
layout_tensors_vector_optional_list
),
" "
.
join
(
layout_autotune_attr_code_list
)
+
" "
+
" "
.
join
(
layout_autotune_list
)
+
" "
.
join
(
layout_autotune_optional_list
),
after_call_str
,
layout_autotune_outs_list
,
returns_str
)
layout_autotune_list_str
=
" "
.
join
(
layout_autotune_list
)
+
" "
.
join
(
layout_autotune_optional_list
)
layout_logic_str
=
self
.
GenerateForwardLayoutAutotune
(
forward_api_name
,
amp_tensors_vector_list
,
layout_tensors_vector_optional_list
,
layout_autotune_list_str
,
returns_type_str
,
returns_str
,
amp_inputs_call_args_str
)
# For inputs outputs prepare for logging
var_str
=
f
"
\n
{
indent
}
std::string input_str =
\"\"
;"
...
...
paddle/fluid/eager/eager_layout_auto_tune.h
浏览文件 @
16439bb9
...
...
@@ -19,20 +19,65 @@
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace
egr
{
// layout_agnostic_ops_
// For agnostic op like add / relu
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
inline
bool
NeedTransLayout
(
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
const
paddle
::
experimental
::
DataLayout
&
layout
)
{
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
layout
!=
tensors_vector
[
i
][
idx
].
layout
())
{
return
true
;
}
}
}
return
false
;
}
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
BaseTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
{
VLOG
(
3
)
<<
" Optimze Layout agnostic op: "
<<
op_name
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
bool
unstart
=
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
);
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
VLOG
(
3
)
<<
"Layout autotune was is start ? "
<<
(
!
unstart
)
<<
op_name
<<
"'s layout is "
<<
first_layout
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
// For agnostic op like add, relu, exp
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
if
(
NeedTransLayout
(
tensors_vector
,
first_layout
))
{
bool
need_trans_back
=
false
;
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
4
!=
tensors_vector
[
i
][
idx
].
shape
().
size
())
{
need_trans_back
=
true
;
VLOG
(
3
)
<<
"Agnostic op "
<<
op_name
<<
" shape is "
<<
tensors_vector
[
i
][
idx
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
i
][
idx
].
layout
();
}
}
}
auto
final_layout
=
need_trans_back
?
default_layout
:
desired_layout
;
return
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
final_layout
);
}
return
BaseTransformer
(
op_name
,
tensors_vector
);
}
// For lightly op like reduce
template
<
typename
T
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
...
...
@@ -40,16 +85,11 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
T
*
attr
)
{
VLOG
(
3
)
<<
"Lightly op "
<<
op_name
<<
"'s shape is "
<<
tensors_vector
[
0
][
0
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
bool
unstart
=
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
);
if
(
unstart
)
{
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
...
...
@@ -63,33 +103,30 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
kSlotSmallVectorSize
>&
tensors_vector
,
T1
*
axis
,
T2
*
keep_dim
)
{
VLOG
(
3
)
<<
"Lightly op "
<<
op_name
<<
"'s shape is "
<<
tensors_vector
[
0
][
0
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
0
][
0
].
layout
();
return
EagerLayoutAutotune
<
T1
>
(
op_name
,
tensors_vector
,
axis
);
}
// heavily string data_format data_layout
// heavily string data_format
,
data_layout
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
std
::
string
*
attr
)
{
VLOG
(
3
)
<<
" Optimze Layout heavily op: "
<<
op_name
;
auto
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
()
;
auto
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
// Layout autotune only supports model with convolutional layers
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
VLOG
(
3
)
<<
"Optimze Layout was not started
"
<<
op_name
;
if
(
op_name
!=
"conv2d"
)
{
return
transposer
;
}
else
{
#if defined(PADDLE_WITH_CUDA)
if
(
paddle
::
platform
::
is_gpu_place
(
tensors_vector
[
0
][
0
].
place
())
&&
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
return
transposer
;
}
#endif
auto
data_type
=
tensors_vector
[
0
][
0
].
dtype
();
bool
is_tune_fp32
=
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
)
&&
...
...
@@ -97,6 +134,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool
is_tune_fp16
=
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
&&
(
*
attr
==
"NCHW"
);
VLOG
(
3
)
<<
"Conv2d_dy's dtype "
<<
data_type
<<
" format"
<<
(
*
attr
);
if
(
is_tune_fp32
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
paddle
::
experimental
::
DataLayout
::
NCHW
);
...
...
@@ -109,26 +147,27 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDefaultLayout
(
paddle
::
experimental
::
DataLayout
::
NCHW
);
}
else
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
return
transposer
;
}
VLOG
(
3
)
<<
"Tune the layout from "
<<
attr
<<
" to "
<<
paddle
::
framework
::
DataLayoutToString
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
.
GetDesiredLayout
());
VLOG
(
3
)
<<
"Tune the layout from "
<<
*
attr
<<
" to "
<<
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
}
}
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
IsHeavilyLayoutSensitive
(
op_name
))
{
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer"
;
auto
heavily_transposer
=
std
::
make_shared
<
EagerHeavilyLayoutSensitiveOpTransformer
>
(
op_name
,
attr
);
return
heavily_transposer
;
}
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is unimplemented. Use default "
"LayoutTransformer instead."
;
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is unimplemented. Use default."
;
return
transposer
;
}
...
...
@@ -139,24 +178,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
std
::
vector
<
int
>*
attr
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
if
(
op_name
==
"transpose2"
)
{
if
(
op_name
==
"transpose2"
&&
(
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()))
{
auto
trans
=
std
::
make_shared
<
EagerTransposeOpTransformer
>
(
op_name
);
if
(
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
())
{
trans
->
SetAttr
(
attr
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
trans
;
}
trans
->
SetAttr
(
attr
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
trans
;
}
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
...
...
@@ -172,33 +210,32 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize
>&
tensors_vector
,
paddle
::
experimental
::
Scalar
*
axis
,
bool
*
keep_dim
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
op_name
==
"argmax"
)
{
if
(
op_name
==
"argmax"
&&
(
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
)
&&
(
*
keep_dim
))
{
std
::
shared_ptr
<
EagerArgmaxOpTransformer
>
argmax_transform
=
nullptr
;
argmax_transform
=
std
::
make_shared
<
EagerArgmaxOpTransformer
>
(
op_name
);
if
((
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
)
&&
(
*
keep_dim
))
{
argmax_transform
->
SetAttr
(
axis
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
argmax_transform
;
}
argmax_transform
->
SetAttr
(
axis
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
argmax_transform
;
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
// lightly
int
flatten
// lightly
for
flatten
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
int
,
int
>
(
const
std
::
string
&
op_name
,
...
...
@@ -206,17 +243,17 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
kSlotSmallVectorSize
>&
tensors_vector
,
int
*
start_axis
,
int
*
stop_axis
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
" Optimze Layout Unstarted : "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
desired_layout
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
bool
no_tranpose
=
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
bool
no_tranpose
=
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
;
bool
is_valid
=
((
*
start_axis
)
==
1
&&
(
*
stop_axis
)
==
3
);
if
(
op_name
==
"flatten"
||
op_name
==
"flatten_contiguous_range"
)
{
if
(
no_tranpose
&&
is_valid
)
{
...
...
@@ -226,15 +263,13 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
}
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
// lightly int Concat
// lightly T can be int vector<int> vector<int64_t> IntArray
template
<
>
// default int
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
paddle
::
experimental
::
Scalar
>
(
const
std
::
string
&
op_name
,
...
...
@@ -243,30 +278,21 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
paddle
::
experimental
::
Scalar
*
axis
)
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
desired_layout
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
bool
need_transpose
=
false
;
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
desired_layout
!=
tensors_vector
[
i
][
idx
].
layout
())
{
need_transpose
=
true
;
}
}
}
if
(
need_transpose
)
{
VLOG
(
3
)
<<
"Concat need transpose to NCHW "
<<
op_name
;
if
(
NeedTransLayout
(
tensors_vector
,
desired_layout
))
{
VLOG
(
3
)
<<
op_name
<<
" need transpose to default layout"
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
else
{
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
auto
trans
=
std
::
make_shared
<
EagerConcatOpTransformer
>
(
op_name
);
trans
->
SetAttr
(
axis
,
desired_layout
);
return
trans
;
...
...
paddle/fluid/eager/eager_layout_transformer.h
浏览文件 @
16439bb9
...
...
@@ -22,9 +22,9 @@ namespace egr {
inline
paddle
::
experimental
::
Tensor
EagerTraceTransposeOp
(
const
paddle
::
experimental
::
DataLayout
layout
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
VLOG
(
4
)
<<
"AutoTune Transpose from "
<<
in
.
layout
()
<<
" to "
<<
layout
<<
", tensor's shape is "
<<
in
.
shape
().
size
();
if
(
in
.
shape
().
size
()
!=
4
)
{
VLOG
(
4
)
<<
"Shape is "
<<
in
.
shape
().
size
()
<<
" can't transpose to"
<<
paddle
::
framework
::
DataLayoutToString
(
layout
);
return
in
;
}
std
::
vector
<
int
>
axis
;
...
...
@@ -44,77 +44,75 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
// agnostic op
class
EagerLayoutTransformer
{
using
Layout
=
paddle
::
experimental
::
DataLayout
;
public:
EagerLayoutTransformer
()
:
op_name_
(
""
)
{}
explicit
EagerLayoutTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
:
op_name_
(
op_name
)
{
final_layout_
=
"UNDEFINED"
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
final_layout_
==
"UNDEFINED"
)
{
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
tensors_vector
[
0
][
0
].
layout
());
}
else
if
(
tensors_vector
[
i
][
idx
].
layout
()
==
desired_layout
)
{
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout
);
break
;
}
}
}
VLOG
(
4
)
<<
op_name_
<<
"final_layout_ is "
<<
final_layout_
;
}
EagerLayoutTransformer
()
:
op_name_
(
""
),
final_layout_
(
Layout
::
UNDEFINED
)
{}
EagerLayoutTransformer
(
const
EagerLayoutTransformer
&
)
=
delete
;
EagerLayoutTransformer
&
operator
=
(
const
EagerLayoutTransformer
&
)
=
delete
;
explicit
EagerLayoutTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
const
Layout
final_layout
=
Layout
::
UNDEFINED
)
:
op_name_
(
op_name
),
final_layout_
(
final_layout
)
{
VLOG
(
4
)
<<
"Agnostic op : "
<<
op_name_
<<
" final_layout_ is "
<<
final_layout_
;
}
virtual
~
EagerLayoutTransformer
()
{}
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, final_layout_ is "
<<
final_layout_
;
return
in
;
virtual
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
if
(
final_layout_
==
Layout
::
UNDEFINED
||
final_layout_
==
in
.
layout
())
{
VLOG
(
4
)
<<
"EagerLayoutTransformer with no trans"
;
return
in
;
}
else
{
// from NCHW to NHWC
VLOG
(
4
)
<<
"EagerLayoutTransformer with trans from "
<<
in
.
layout
()
<<
" to "
<<
final_layout_
;
auto
out_tensor
=
EagerTraceTransposeOp
(
final_layout_
,
in
);
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
.
impl
().
get
()))
->
layout
=
final_layout_
;
return
out_tensor
;
}
}
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensor
(
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
>&
in
)
{
return
in
;
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
return
in
?
TransInTensor
(
in_name
,
*
in
)
:
in
;
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
" TransInTensor"
;
return
in
;
}
virtual
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensors
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
in
)
{
VLOG
(
4
)
<<
" TransInTensor"
;
if
(
in
)
{
return
TransInTensors
(
in_name
,
*
in
);
}
return
in
;
}
virtual
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
bool
use_default
=
(
final_layout_
==
"Undefined(AnyLayout)"
||
final_layout_
==
(
"UNDEFINED"
));
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
if
(
!
use_default
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
}
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
virtual
void
SetOutTensorLayout
(
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
VLOG
(
4
)
<<
"optional out_tensor"
;
}
virtual
void
SetOutTensorLayout
(
std
::
vector
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
bool
use_default
=
(
final_layout_
==
"Undefined(AnyLayout)"
||
final_layout_
==
(
"UNDEFINED"
));
bool
use_default
=
(
final_layout_
==
Layout
::
UNDEFINED
);
if
(
!
use_default
)
{
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
...
...
@@ -126,9 +124,24 @@ class EagerLayoutTransformer {
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
virtual
void
SetOutTensorLayout
(
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>*
out_tensor
)
{
VLOG
(
4
)
<<
"optional out_tensor"
;
}
virtual
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
bool
use_default
=
final_layout_
==
Layout
::
UNDEFINED
;
if
(
!
use_default
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
final_layout_
;
}
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
const
Layout
final_layout_
;
};
class
EagerHeavilyLayoutSensitiveOpTransformer
:
public
EagerLayoutTransformer
{
...
...
@@ -145,21 +158,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
}
}
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is heavily"
;
return
in
;
}
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is heavily"
;
return
in
;
}
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
if
(
heavily_input_
.
count
(
in_name
)
!=
0
&&
in
.
layout
()
!=
desired_layout_
)
{
...
...
@@ -230,7 +228,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
if
(
final_layout_
==
input_layout
&&
in
.
shape
().
size
()
==
4
)
{
VLOG
(
4
)
<<
op_name_
<<
"'s "
<<
in_name
<<
" need transpose from "
<<
input_layout
<<
" to default_layout"
;
...
...
@@ -245,7 +242,7 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
return
in
;
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
std
::
vector
<
paddle
::
experimental
::
Tensor
>
result
;
...
...
@@ -340,22 +337,19 @@ class EagerTransposeOpTransformer
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
VLOG
(
4
)
<<
"with no transpose: EagerTransposeOpTransformer "
<<
in_name
<<
"'s layout is "
<<
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
return
in
;
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
auto
de
sired
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
if
(
out_tensor
->
layout
()
!=
de
sired
_layout
)
{
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
if
(
out_tensor
->
layout
()
!=
de
fault
_layout
)
{
VLOG
(
4
)
<<
" Set Out_tensor's layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
default_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
de
sired
_layout
;
->
layout
=
de
fault
_layout
;
}
}
...
...
@@ -385,15 +379,15 @@ class EagerArgmaxOpTransformer
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
VLOG
(
4
)
<<
"EagerArgmaxOpTransformer's out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
auto
de
sired
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
if
(
de
sired
_layout
!=
out_tensor
->
layout
())
{
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
if
(
de
fault
_layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
default_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
de
sired
_layout
;
->
layout
=
de
fault
_layout
;
}
}
...
...
@@ -410,11 +404,11 @@ class EagerFlattenOpTransformer
explicit
EagerFlattenOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
de
sired
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
std
::
string
de
sired
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
sired
_layout
);
final_layout_
=
de
sired
_layout_str
;
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
std
::
string
de
fault
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
fault
_layout
);
final_layout_
=
de
fault
_layout_str
;
}
// transpose from NHWC to NCHW
...
...
@@ -424,16 +418,17 @@ class EagerFlattenOpTransformer
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
VLOG
(
4
)
<<
"Eager
Argmax
OpTransformer's out layout is"
VLOG
(
4
)
<<
"Eager
Flatten
OpTransformer's out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
if
(
layout
!=
out_tensor
->
layout
())
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
desired_layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
desired_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
->
layout
=
desired_
layout
;
}
}
...
...
@@ -450,11 +445,11 @@ class EagerConcatOpTransformer
explicit
EagerConcatOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
de
sired
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
std
::
string
de
sired
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
sired
_layout
);
final_layout_
=
de
sired
_layout_str
;
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
std
::
string
de
fault
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
fault
_layout
);
final_layout_
=
de
fault
_layout_str
;
}
void
SetAttr
(
paddle
::
experimental
::
Scalar
*
axis
,
...
...
@@ -467,7 +462,7 @@ class EagerConcatOpTransformer
(
*
axis
)
=
static_cast
<
paddle
::
experimental
::
Scalar
>
(
perm
[
axes
]);
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
return
in
;
...
...
paddle/fluid/imperative/layout_autotune.cc
浏览文件 @
16439bb9
...
...
@@ -14,23 +14,15 @@
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/imperative/layout_transformer.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
namespace
paddle
{
namespace
imperative
{
bool
LayoutAutoTune
::
UseLayoutAutoTune
()
const
{
#if defined(PADDLE_WITH_CUDA)
return
use_layout_autotune_
;
#else
return
false
;
#endif
}
LayoutAutoTune
::
LayoutAutoTune
()
{
const
auto
&
op_info
=
paddle
::
framework
::
OpInfoMap
::
Instance
().
map
();
for
(
auto
it
=
op_info
.
begin
();
it
!=
op_info
.
end
();
it
++
)
{
...
...
@@ -140,6 +132,26 @@ paddle::imperative::NameVarMap<VarType> DealLightlyLayoutSensitive(
return
transposer
->
Apply
(
ins
,
outs
,
attrs
,
tracer
);
}
LayoutAutotuneGuard
::
LayoutAutotuneGuard
(
std
::
shared_ptr
<
Tracer
>
tracer
,
bool
use_autotune
)
:
tracer_
(
tracer
)
{
pre_layout_autotune_
=
tracer_
->
UseLayoutAutoTune
();
if
(
pre_layout_autotune_
!=
use_autotune
)
{
tracer_
->
EnableLayoutAutoTune
();
if
(
!
use_autotune
)
{
tracer_
->
DisableLayoutAutoTune
();
}
}
}
LayoutAutotuneGuard
::~
LayoutAutotuneGuard
()
{
if
(
pre_layout_autotune_
)
{
tracer_
->
EnableLayoutAutoTune
();
}
else
{
tracer_
->
DisableLayoutAutoTune
();
}
}
template
<
typename
VarType
>
paddle
::
imperative
::
NameVarMap
<
VarType
>
AutoTuneLayout
(
const
std
::
string
&
op_type
,
...
...
@@ -147,7 +159,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const
paddle
::
imperative
::
NameVarMap
<
VarType
>&
outs
,
paddle
::
framework
::
AttributeMap
*
attrs
,
const
std
::
shared_ptr
<
imperative
::
Tracer
>&
tracer
)
{
if
(
!
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
())
{
if
(
!
tracer
->
UseLayoutAutoTune
())
{
return
ins
;
}
// When layout autotuning is enabled, the tuner will check the desired layout.
...
...
@@ -165,7 +177,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
}
else
{
#if defined(PADDLE_WITH_CUDA)
if
(
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
tracer
->
DisableLayoutAutoTune
();
return
ins
;
}
#endif
...
...
@@ -185,7 +197,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
}
else
if
(
is_tune_fp16
)
{
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
DataLayout
::
NHWC
);
}
else
{
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
tracer
->
DisableLayoutAutoTune
();
return
ins
;
}
VLOG
(
3
)
<<
"Tune the layout from "
...
...
paddle/fluid/imperative/layout_autotune.h
浏览文件 @
16439bb9
...
...
@@ -19,8 +19,8 @@
#include <unordered_set>
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/common/layout.h"
namespace
paddle
{
namespace
imperative
{
...
...
@@ -35,12 +35,6 @@ class LayoutAutoTune {
return
layout_autoTune
;
}
bool
UseLayoutAutoTune
()
const
;
void
EnableLayoutAutoTune
()
{
use_layout_autotune_
=
true
;
}
void
DisableLayoutAutoTune
()
{
use_layout_autotune_
=
false
;
}
bool
IsHeavilyLayoutSensitive
(
const
std
::
string
&
op_type
)
const
{
return
heavily_layout_sensitive_ops_
.
count
(
op_type
)
!=
0
;
}
...
...
@@ -64,8 +58,6 @@ class LayoutAutoTune {
private:
LayoutAutoTune
();
bool
use_layout_autotune_
{
false
};
std
::
unordered_set
<
std
::
string
>
layout_agnostic_ops_
{};
std
::
unordered_set
<
std
::
string
>
heavily_layout_sensitive_ops_
{
"batch_norm"
};
...
...
@@ -73,11 +65,29 @@ class LayoutAutoTune {
std
::
unordered_set
<
std
::
string
>
lightly_layout_sensitive_ops_
{
"instance_norm"
,
"softmax"
,
"transpose"
,
"transpose2"
,
"reshape2"
};
// Best Layout in this platform
DataLayout
desired_layout_
{
DataLayout
::
UNDEFINED
};
// Default Layout in this model
DataLayout
default_layout_
{
DataLayout
::
UNDEFINED
};
};
// LayoutAutotuneGuard is used for RAII.
class
LayoutAutotuneGuard
{
public:
LayoutAutotuneGuard
(
std
::
shared_ptr
<
Tracer
>
tracer
,
bool
use_autotune
);
~
LayoutAutotuneGuard
();
// forbid copy and operator=
LayoutAutotuneGuard
(
const
LayoutAutotuneGuard
&
guard
)
=
delete
;
LayoutAutotuneGuard
&
operator
=
(
const
LayoutAutotuneGuard
&
guard
)
=
delete
;
private:
std
::
shared_ptr
<
Tracer
>
tracer_
;
bool
pre_layout_autotune_
;
};
template
<
typename
VarType
>
paddle
::
imperative
::
NameVarMap
<
VarType
>
AutoTuneLayout
(
const
std
::
string
&
op_type
,
...
...
paddle/fluid/imperative/layout_transformer.h
浏览文件 @
16439bb9
...
...
@@ -19,8 +19,24 @@
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/tensor_utils.h"
namespace
paddle
{
namespace
imperative
{
template
<
typename
VarType
>
void
SetOutDataLayout
(
std
::
shared_ptr
<
VarType
>
var
,
const
paddle
::
experimental
::
DataLayout
layout
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
// set out_tensor's layout
if
(
var
->
MutableVar
()
->
IsInitialized
())
{
paddle
::
framework
::
Variable
*
tmp_var
=
var
->
MutableVar
();
auto
*
out
=
tmp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
framework
::
LoDTensor
*>
(
out
))
->
layout
=
layout
;
}
}
}
template
<
typename
VarType
>
std
::
shared_ptr
<
VarType
>
TraceTransposeOp
(
...
...
@@ -118,7 +134,7 @@ class LayoutTransformer {
auto
out_vars
=
outs
.
at
(
name
);
for
(
auto
&
var
:
out_vars
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
paddle
::
imperative
::
Set
Out
DataLayout
(
var
,
layout
);
}
}
not_in_out
=
false
;
...
...
@@ -130,7 +146,7 @@ class LayoutTransformer {
for
(
auto
&
pair
:
outs
)
{
for
(
auto
&
var
:
pair
.
second
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
paddle
::
imperative
::
Set
Out
DataLayout
(
var
,
layout
);
}
}
}
...
...
paddle/fluid/imperative/tracer.cc
浏览文件 @
16439bb9
...
...
@@ -42,6 +42,8 @@ thread_local bool Tracer::enable_program_desc_tracing_ = false;
thread_local
bool
Tracer
::
has_grad_
=
true
;
thread_local
bool
Tracer
::
use_layout_autotune_
=
false
;
thread_local
AmpLevel
Tracer
::
amp_level_
=
AmpLevel
::
O0
;
thread_local
phi
::
DataType
Tracer
::
amp_dtype_
=
phi
::
DataType
::
FLOAT32
;
...
...
paddle/fluid/imperative/tracer.h
浏览文件 @
16439bb9
...
...
@@ -28,9 +28,9 @@
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/core/compat/arg_map_context.h"
namespace
paddle
{
namespace
imperative
{
...
...
@@ -184,6 +184,20 @@ class Tracer {
}
}
void
DisableLayoutAutoTune
()
{
use_layout_autotune_
=
false
;
}
void
EnableLayoutAutoTune
()
{
use_layout_autotune_
=
true
;
}
bool
UseLayoutAutoTune
()
{
#if defined(PADDLE_WITH_CUDA)
if
(
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
return
use_layout_autotune_
;
}
#endif
use_layout_autotune_
=
false
;
return
false
;
}
phi
::
KernelSignature
GetExpectedKernelSignature
(
const
std
::
string
&
type
,
const
NameTensorMap
&
ins
,
...
...
@@ -199,8 +213,8 @@ class Tracer {
std
::
unique_ptr
<
UniqueNameGenerator
>
generator_
;
platform
::
Place
expected_place_
;
GarbageCollectorMap
gcs_
;
static
thread_local
bool
enable_program_desc_tracing_
;
static
thread_local
bool
use_layout_autotune_
;
static
thread_local
bool
has_grad_
;
static
thread_local
AmpLevel
amp_level_
;
static
thread_local
phi
::
DataType
amp_dtype_
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
16439bb9
...
...
@@ -2503,19 +2503,14 @@ All parameter, weight, gradient are variables in Paddle.
return
res
;
});
m
.
def
(
"enable_layout_autotune"
,
[]
{
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
.
EnableLayoutAutoTune
();
});
m
.
def
(
"enable_layout_autotune"
,
[]
{
return
egr
::
Controller
::
Instance
().
EnableLayoutAutoTune
();
});
m
.
def
(
"disable_layout_autotune"
,
[]
{
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
.
DisableLayoutAutoTune
();
});
m
.
def
(
"disable_layout_autotune"
,
[]
{
return
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
});
m
.
def
(
"use_layout_autotune"
,
[]
{
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
();
});
m
.
def
(
"use_layout_autotune"
,
[]
{
return
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
();
});
BindFleetWrapper
(
&
m
);
BindIO
(
&
m
);
...
...
paddle/phi/api/lib/data_transform.cc
浏览文件 @
16439bb9
...
...
@@ -52,9 +52,9 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input,
return
ret
;
}
inline
bool
NeedTransformLayout
(
const
paddle
::
platform
::
Place
&
place
,
const
DataLayout
&
input
,
inline
bool
NeedTransformLayout
(
const
DataLayout
&
input
,
const
DataLayout
&
target
,
const
paddle
::
platform
::
Place
&
place
,
const
TransformFlag
&
transform_flag
)
{
bool
ret
=
transform_flag
.
need_trans_layout
()
&&
(
input
!=
DataLayout
::
ALL_LAYOUT
&&
...
...
@@ -202,9 +202,9 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
bool
trans_layout
=
false
;
bool
trans_dtype
=
false
;
if
(
NeedTransformLayout
(
tensor
->
place
(),
tensor
->
layout
(),
if
(
NeedTransformLayout
(
tensor
->
layout
(),
target_args_def
.
layout
,
tensor
->
place
(),
transform_flag
))
{
out
=
TransDataLayout
(
out
,
target_args_def
.
layout
);
trans_layout
=
true
;
...
...
@@ -240,9 +240,9 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
dense_tensor
.
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
!
NeedTransformDataType
(
dense_tensor
.
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
dense_tensor
.
place
(),
dense_tensor
.
layout
(),
!
NeedTransformLayout
(
dense_tensor
.
layout
(),
target_args_def
.
layout
,
dense_tensor
.
place
(),
transform_flag
)))
{
return
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
);
}
...
...
@@ -277,9 +277,9 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData(
tensor_in
->
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
!
NeedTransformDataType
(
tensor_in
->
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
tensor_in
->
place
(),
tensor_in
->
layout
(),
!
NeedTransformLayout
(
tensor_in
->
layout
(),
target_args_def
.
layout
,
tensor_in
->
place
(),
transform_flag
)))
{
pt_tensors
->
emplace_back
(
*
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
));
...
...
python/paddle/fluid/tests/unittests/test_layout_autotune.py
浏览文件 @
16439bb9
...
...
@@ -46,6 +46,13 @@ class SimpleNet(paddle.nn.Layer):
class
LayoutAutoTune
(
unittest
.
TestCase
):
def
test_config
(
self
):
paddle
.
fluid
.
core
.
enable_layout_autotune
()
if
self
.
use_autoune
():
self
.
assertEqual
(
paddle
.
fluid
.
core
.
use_layout_autotune
(),
True
)
paddle
.
fluid
.
core
.
disable_layout_autotune
()
self
.
assertEqual
(
paddle
.
fluid
.
core
.
use_layout_autotune
(),
False
)
def
setUp
(
self
):
self
.
use_autoune
()
...
...
python/paddle/nn/functional/conv.py
浏览文件 @
16439bb9
...
...
@@ -130,15 +130,13 @@ def _conv_nd(x,
if
bias
is
not
None
:
channel_dim
=
channel_dim
+
len
(
x
.
shape
)
if
channel_dim
<
0
else
channel_dim
if
pre_bias
.
layout
==
"NHWC"
:
channel_dim
=
3
# last dim
if
isinstance
(
x
,
tuple
):
x
=
x
[
0
]
if
isinstance
(
bias
,
tuple
):
bias
=
bias
[
0
]
if
len
(
bias
.
shape
)
<
len
(
x
.
shape
):
tmp_bias
=
_C_ops
.
reshape
(
bias
,
bias
.
shape
+
bias
,
[
1
for
i
in
range
(
channel_dim
)]
+
bias
.
shape
+
[
1
for
i
in
range
(
len
(
x
.
shape
)
-
channel_dim
-
1
)])
return
_C_ops
.
add
(
pre_bias
,
tmp_bias
)
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录