Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
16439bb9
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
16439bb9
编写于
9月 19, 2022
作者:
N
niuliling123
提交者:
GitHub
9月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update layoutautotune for inplace (#45826)
上级
46e4fb2a
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
443 addition
and
326 deletion
+443
-326
paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
...r/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
+11
-12
paddle/fluid/eager/api/utils/global_utils.h
paddle/fluid/eager/api/utils/global_utils.h
+17
-0
paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
...le/fluid/eager/auto_code_generator/generator/eager_gen.py
+112
-86
paddle/fluid/eager/eager_layout_auto_tune.h
paddle/fluid/eager/eager_layout_auto_tune.h
+112
-86
paddle/fluid/eager/eager_layout_transformer.h
paddle/fluid/eager/eager_layout_transformer.h
+90
-95
paddle/fluid/imperative/layout_autotune.cc
paddle/fluid/imperative/layout_autotune.cc
+24
-12
paddle/fluid/imperative/layout_autotune.h
paddle/fluid/imperative/layout_autotune.h
+19
-9
paddle/fluid/imperative/layout_transformer.h
paddle/fluid/imperative/layout_transformer.h
+18
-2
paddle/fluid/imperative/tracer.cc
paddle/fluid/imperative/tracer.cc
+2
-0
paddle/fluid/imperative/tracer.h
paddle/fluid/imperative/tracer.h
+16
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-11
paddle/phi/api/lib/data_transform.cc
paddle/phi/api/lib/data_transform.cc
+8
-8
python/paddle/fluid/tests/unittests/test_layout_autotune.py
python/paddle/fluid/tests/unittests/test_layout_autotune.py
+7
-0
python/paddle/nn/functional/conv.py
python/paddle/nn/functional/conv.py
+1
-3
未找到文件。
paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
浏览文件 @
16439bb9
...
...
@@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func(
auto
amp_dst_dtype
=
egr
::
GetAmpDestDtype
(
op_name
,
amp_tensors_vector
);
auto
NEW
_input
=
auto
new
_input
=
egr
::
EagerAmpAutoCast
(
"input"
,
input
,
amp_dst_dtype
,
op_name
);
auto
NEW
_filter
=
auto
new
_filter
=
egr
::
EagerAmpAutoCast
(
"filter"
,
filter
,
amp_dst_dtype
,
op_name
);
{
paddle
::
imperative
::
AutoCastGuard
guard
(
egr
::
Controller
::
Instance
().
GetCurrentTracer
(),
paddle
::
imperative
::
AmpLevel
::
O0
);
return
conv2d_ad_func
(
NEW
_input
,
NEW
_filter
,
return
conv2d_ad_func
(
new
_input
,
new
_filter
,
strides
,
paddings
,
paddding_algorithm
,
...
...
@@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func(
// Layout autotune
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
())
{
if
(
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
())
{
VLOG
(
5
)
<<
"Check and Prepare For LAYOUT"
;
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
...
...
@@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func(
auto
op_name
=
phi
::
TransToFluidOpName
(
"conv2d"
);
auto
transformer
=
egr
::
EagerLayoutAutotune
<
std
::
string
>
(
op_name
,
tensors_vector
,
&
data_format
);
auto
NEW_input
=
transformer
->
TransInTensor
(
"input"
,
input
);
bool
is_enable_tune
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
auto
out
=
conv2d_ad_func
(
NEW_input
,
auto
new_input
=
transformer
->
TransInTensor
(
"input"
,
input
);
bool
need_tune
=
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
();
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
auto
out
=
conv2d_ad_func
(
new_input
,
filter
,
strides
,
paddings
,
...
...
@@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func(
workspace_size_MB
,
exhaustive_search
);
transformer
->
SetOutTensorLayout
(
&
out
);
if
(
is_enable
_tune
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
EnableLayoutAutoTune
();
if
(
need
_tune
)
{
egr
::
Controller
::
Instance
().
EnableLayoutAutoTune
();
}
// Returns
return
out
;
...
...
paddle/fluid/eager/api/utils/global_utils.h
浏览文件 @
16439bb9
...
...
@@ -55,6 +55,23 @@ class Controller {
paddle
::
imperative
::
AmpLevel
GetAMPLevel
()
const
{
return
tracer_
->
GetAmpLevel
();
}
bool
UseLayoutAutoTune
()
{
bool
use_autotune
=
false
;
#if defined(PADDLE_WITH_CUDA)
auto
place
=
tracer_
->
ExpectedPlace
();
bool
is_gpu_place
=
paddle
::
platform
::
is_gpu_place
(
place
);
if
(
is_gpu_place
)
{
use_autotune
=
tracer_
->
UseLayoutAutoTune
();
}
#endif
return
use_autotune
;
}
void
DisableLayoutAutoTune
()
{
tracer_
->
DisableLayoutAutoTune
();
}
void
EnableLayoutAutoTune
()
{
tracer_
->
EnableLayoutAutoTune
();
}
bool
HasGrad
()
const
{
return
tracer_
->
HasGrad
();
}
void
SetHasGrad
(
bool
has_grad
)
{
tracer_
->
SetHasGrad
(
has_grad
);
}
std
::
string
GenerateUniqueName
(
std
::
string
key
=
"eager_in_tmp"
)
{
...
...
paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
浏览文件 @
16439bb9
...
...
@@ -437,15 +437,14 @@ AMP_LOGIC_TEMPLATE = \
"""
LAYOUT_LOGIC_TEMPLATE
=
\
"""
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{
VLOG(5) << "Check and Prepare For LAYOUT";
if (egr::Controller::Instance().UseLayoutAutoTune()) {{
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{}
{}
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
VLOG(5) << "Check and Prepare For LAYOUT "<< op_name;
paddle::imperative::LayoutAutotuneGuard guard(egr::Controller::Instance().GetCurrentTracer(), false);
{}
{}
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns
return {};
}}
...
...
@@ -922,6 +921,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
set_grad_in_meta
=
f
"
{
indent
}
grad_node->SetGradInMeta(
{
name
}
,
{
pos
}
);"
set_retain_grad
=
f
"
{
indent
}
egr::EagerUtils::CheckAndRetainGrad(
{
name
}
);"
set_out_rank_list
.
append
(
set_out_rank
)
set_history_list
.
append
(
set_history
)
set_grad_in_meta_list
.
append
(
set_grad_in_meta
)
...
...
@@ -1014,6 +1014,98 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
self
.
forward_definition_str
=
""
self
.
forward_declaration_str
=
""
def
GenerateForwardLayoutAutotune
(
self
,
forward_api_name
,
amp_tensors_vector_list
,
layout_tensors_vector_optional_list
,
layout_autotune_list_str
,
returns_type_str
,
returns_str
,
amp_inputs_call_args_str
):
intermediate_outputs
=
self
.
intermediate_outputs
forward_attrs_list
=
self
.
forward_attrs_list
forward_outputs_position_map
=
self
.
forward_outputs_position_map
num_outputs
=
len
(
forward_outputs_position_map
.
keys
())
-
len
(
intermediate_outputs
)
# for layout autotune attr
lightly_sensitive_attr
=
[
'axis'
,
'axes'
,
'dim'
,
'dims'
,
'start'
,
'end'
,
'stop'
]
heavily_sensitive_attr
=
[
'data_format'
,
'data_layout'
]
layout_autotune_attr
=
[]
layout_autotune_attr_code_list
=
[]
layout_autotune_attr_type_list
=
[]
layout_autotune_attr_code_list
.
append
(
f
"auto op_name = phi::TransToFluidOpName(
\"
{
forward_api_name
}
\"
);
\n
"
)
lightly_flag
=
False
heavily_flag
=
False
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
for
attr_name
in
lightly_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
(
name
not
in
layout_autotune_attr
):
lightly_flag
=
True
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
if
lightly_flag
is
False
:
for
attr_name
in
heavily_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
(
name
not
in
layout_autotune_attr
):
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
heavily_flag
=
True
if
len
(
layout_autotune_attr
)
==
0
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
1
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
2
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
,
{
layout_autotune_attr_type_list
[
1
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
, &
{
layout_autotune_attr
[
1
]
}
);
\n
"
)
else
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector,&
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
# Out tensor
layout_inputs_call_args_str
=
amp_inputs_call_args_str
forward_function_name
=
GetDygraphForwardFunctionName
(
forward_api_name
)
layout_tmp_result_list
=
[]
layout_autotune_outs_list
=
[]
result_name
=
"api_result"
if
num_outputs
==
1
:
result_name
=
returns_str
layout_autotune_outs_list
.
append
(
f
"transformer -> SetOutTensorLayout(&
{
returns_str
}
);
\n
"
)
else
:
for
name
,
(
rtype
,
pos
)
in
forward_outputs_position_map
.
items
():
if
name
in
intermediate_outputs
:
continue
layout_autotune_outs_list
.
append
(
f
" auto&
{
name
}
= std::get<
{
len
(
layout_tmp_result_list
)
}
>(api_result);
\n
"
)
layout_autotune_outs_list
.
append
(
f
" transformer -> SetOutTensorLayout(&
{
name
}
);
\n
"
)
layout_tmp_result_list
.
append
(
f
"
{
name
}
"
)
tensors_vector_list_str
=
"{ "
+
","
.
join
(
amp_tensors_vector_list
)
+
" }"
if
len
(
amp_tensors_vector_list
)
==
0
:
layout_logic_str
=
""
else
:
after_call_str
=
f
"
{
returns_type_str
}
{
result_name
}
=
{
forward_function_name
}
(
{
layout_inputs_call_args_str
}
);
\n
"
layout_logic_str
=
LAYOUT_LOGIC_TEMPLATE
.
format
(
tensors_vector_list_str
,
" "
.
join
(
layout_tensors_vector_optional_list
),
" "
.
join
(
layout_autotune_attr_code_list
)
+
" "
+
layout_autotune_list_str
,
after_call_str
,
" "
.
join
(
layout_autotune_outs_list
),
returns_str
)
return
layout_logic_str
def
GenerateForwardDefinitionAndDeclaration
(
self
,
is_inplaced
):
namespace
=
self
.
namespace
if
self
.
forward_api_name
[
-
1
]
==
'_'
and
not
is_inplaced
:
...
...
@@ -1049,7 +1141,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
layout_tensors_vector_optional_list
=
[]
for
name
,
(
ttype
,
pos
)
in
forward_inputs_position_map
.
items
():
inputs_call_list
[
pos
]
=
f
"
{
name
}
"
amp_inputs_call_list
[
pos
]
=
f
"
NEW
_
{
name
}
"
amp_inputs_call_list
[
pos
]
=
f
"
new
_
{
name
}
"
is_optional
=
(
name
in
optional_inputs
)
if
IsPlainTensorType
(
ttype
):
if
is_optional
:
...
...
@@ -1062,13 +1154,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f
"if (
{
name
}
) amp_tensors_vector.push_back({{ *
{
name
}
}});
\n
"
)
amp_autocast_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_tensors_vector_optional_list
.
append
(
f
"if (
{
name
}
) tensors_vector.push_back({{ *
{
name
}
}});
\n
"
)
layout_autotune_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
...
...
@@ -1076,16 +1168,16 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str
=
f
"paddle::experimental::Tensor&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
else
:
arg_str
=
f
"const paddle::experimental::Tensor&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_list
.
append
(
f
"auto
NEW
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
assert
IsVectorTensorType
(
ttype
)
...
...
@@ -1099,10 +1191,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f
"if (
{
name
}
) amp_tensors_vector.push_back( *
{
name
}
);
\n
"
)
amp_autocast_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_optional_list
.
append
(
f
"auto
NEW_
{
name
}
= transformer->TransInTensor
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new_
{
name
}
= transformer->TransInTensors
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
else
:
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
...
...
@@ -1112,60 +1204,15 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str
=
f
"const std::vector<paddle::experimental::Tensor>&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"
{
name
}
"
)
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
layout_autotune_list
.
append
(
f
"auto
NEW_
{
name
}
= transformer->TransInTensor
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new_
{
name
}
= transformer->TransInTensors
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
inputs_args_definition_list
[
pos
]
=
arg_str
inputs_args_declaration_list
[
pos
]
=
arg_str
# for layout autotune attr
lightly_sensitive_attr
=
[
'axis'
,
'axes'
,
'dim'
,
'dims'
,
'start'
,
'end'
,
'stop'
]
heavily_sensitive_attr
=
[
'data_format'
,
'data_layout'
]
layout_autotune_attr
=
[]
layout_autotune_attr_code_list
=
[]
layout_autotune_attr_type_list
=
[]
layout_autotune_attr_code_list
.
append
(
f
"auto op_name = phi::TransToFluidOpName(
\"
{
forward_api_name
}
\"
);
\n
"
)
lightly_flag
=
False
heavily_flag
=
False
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
for
attr_name
in
lightly_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
name
not
in
layout_autotune_attr
:
lightly_flag
=
True
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
if
lightly_flag
is
False
:
for
attr_name
in
heavily_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
name
not
in
layout_autotune_attr
:
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
heavily_flag
=
True
if
len
(
layout_autotune_attr
)
==
0
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
1
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
2
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
,
{
layout_autotune_attr_type_list
[
1
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
, &
{
layout_autotune_attr
[
1
]
}
);
\n
"
)
else
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector,
{
len
(
layout_autotune_attr
)
}
);
\n
"
)
# forward attrs
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
inputs_call_list
[
pos
]
=
name
...
...
@@ -1356,33 +1403,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list_str
,
amp_call_str
)
# Forward layout autotune
layout_inputs_call_args_str
=
amp_inputs_call_args_str
layout_tmp_result_list
=
[]
layout_autotune_outs_list
=
""
if
num_outputs
==
1
:
layout_autotune_outs_list
+=
f
"
{
indent
}
auto
{
returns_str
}
= api_result;
\n
"
layout_autotune_outs_list
+=
f
"
{
indent
}
transformer -> SetOutTensorLayout(&
{
returns_str
}
);
\n
"
else
:
for
name
,
(
rtype
,
pos
)
in
forward_outputs_position_map
.
items
():
if
name
in
intermediate_outputs
:
continue
layout_autotune_outs_list
+=
f
"
{
indent
}
auto&
{
name
}
= std::get<
{
len
(
layout_tmp_result_list
)
}
>(api_result);
\n
"
layout_autotune_outs_list
+=
f
"
{
indent
}
transformer -> SetOutTensorLayout(&
{
name
}
);
\n
"
layout_tmp_result_list
.
append
(
f
"
{
name
}
"
)
if
returns_type_str
==
"paddle::experimental::Tensor&"
or
forward_api_name
==
"slice"
or
forward_api_name
==
"strided_slice"
or
len
(
layout_autotune_attr
)
==
0
:
layout_logic_str
=
""
else
:
# after_call_str = f"return {forward_ad_function_name}({layout_inputs_call_args_str});\n"
after_call_str
=
f
"auto api_result =
{
forward_ad_function_name
}
(
{
layout_inputs_call_args_str
}
);
\n
"
layout_logic_str
=
LAYOUT_LOGIC_TEMPLATE
.
format
(
amp_tensors_vector_list_str
,
" "
.
join
(
layout_tensors_vector_optional_list
),
" "
.
join
(
layout_autotune_attr_code_list
)
+
" "
+
" "
.
join
(
layout_autotune_list
)
+
" "
.
join
(
layout_autotune_optional_list
),
after_call_str
,
layout_autotune_outs_list
,
returns_str
)
layout_autotune_list_str
=
" "
.
join
(
layout_autotune_list
)
+
" "
.
join
(
layout_autotune_optional_list
)
layout_logic_str
=
self
.
GenerateForwardLayoutAutotune
(
forward_api_name
,
amp_tensors_vector_list
,
layout_tensors_vector_optional_list
,
layout_autotune_list_str
,
returns_type_str
,
returns_str
,
amp_inputs_call_args_str
)
# For inputs outputs prepare for logging
var_str
=
f
"
\n
{
indent
}
std::string input_str =
\"\"
;"
...
...
paddle/fluid/eager/eager_layout_auto_tune.h
浏览文件 @
16439bb9
...
...
@@ -19,20 +19,65 @@
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace
egr
{
// layout_agnostic_ops_
// For agnostic op like add / relu
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
inline
bool
NeedTransLayout
(
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
const
paddle
::
experimental
::
DataLayout
&
layout
)
{
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
layout
!=
tensors_vector
[
i
][
idx
].
layout
())
{
return
true
;
}
}
}
return
false
;
}
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
BaseTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
{
VLOG
(
3
)
<<
" Optimze Layout agnostic op: "
<<
op_name
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
bool
unstart
=
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
);
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
VLOG
(
3
)
<<
"Layout autotune was is start ? "
<<
(
!
unstart
)
<<
op_name
<<
"'s layout is "
<<
first_layout
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
// For agnostic op like add, relu, exp
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
if
(
NeedTransLayout
(
tensors_vector
,
first_layout
))
{
bool
need_trans_back
=
false
;
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
4
!=
tensors_vector
[
i
][
idx
].
shape
().
size
())
{
need_trans_back
=
true
;
VLOG
(
3
)
<<
"Agnostic op "
<<
op_name
<<
" shape is "
<<
tensors_vector
[
i
][
idx
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
i
][
idx
].
layout
();
}
}
}
auto
final_layout
=
need_trans_back
?
default_layout
:
desired_layout
;
return
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
final_layout
);
}
return
BaseTransformer
(
op_name
,
tensors_vector
);
}
// For lightly op like reduce
template
<
typename
T
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
...
...
@@ -40,16 +85,11 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
T
*
attr
)
{
VLOG
(
3
)
<<
"Lightly op "
<<
op_name
<<
"'s shape is "
<<
tensors_vector
[
0
][
0
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
bool
unstart
=
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
);
if
(
unstart
)
{
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
...
...
@@ -63,33 +103,30 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
kSlotSmallVectorSize
>&
tensors_vector
,
T1
*
axis
,
T2
*
keep_dim
)
{
VLOG
(
3
)
<<
"Lightly op "
<<
op_name
<<
"'s shape is "
<<
tensors_vector
[
0
][
0
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
0
][
0
].
layout
();
return
EagerLayoutAutotune
<
T1
>
(
op_name
,
tensors_vector
,
axis
);
}
// heavily string data_format data_layout
// heavily string data_format
,
data_layout
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
std
::
string
*
attr
)
{
VLOG
(
3
)
<<
" Optimze Layout heavily op: "
<<
op_name
;
auto
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
()
;
auto
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
// Layout autotune only supports model with convolutional layers
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
VLOG
(
3
)
<<
"Optimze Layout was not started
"
<<
op_name
;
if
(
op_name
!=
"conv2d"
)
{
return
transposer
;
}
else
{
#if defined(PADDLE_WITH_CUDA)
if
(
paddle
::
platform
::
is_gpu_place
(
tensors_vector
[
0
][
0
].
place
())
&&
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
return
transposer
;
}
#endif
auto
data_type
=
tensors_vector
[
0
][
0
].
dtype
();
bool
is_tune_fp32
=
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
)
&&
...
...
@@ -97,6 +134,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool
is_tune_fp16
=
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
&&
(
*
attr
==
"NCHW"
);
VLOG
(
3
)
<<
"Conv2d_dy's dtype "
<<
data_type
<<
" format"
<<
(
*
attr
);
if
(
is_tune_fp32
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
paddle
::
experimental
::
DataLayout
::
NCHW
);
...
...
@@ -109,26 +147,27 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDefaultLayout
(
paddle
::
experimental
::
DataLayout
::
NCHW
);
}
else
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
return
transposer
;
}
VLOG
(
3
)
<<
"Tune the layout from "
<<
attr
<<
" to "
<<
paddle
::
framework
::
DataLayoutToString
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
.
GetDesiredLayout
());
VLOG
(
3
)
<<
"Tune the layout from "
<<
*
attr
<<
" to "
<<
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
}
}
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
IsHeavilyLayoutSensitive
(
op_name
))
{
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer"
;
auto
heavily_transposer
=
std
::
make_shared
<
EagerHeavilyLayoutSensitiveOpTransformer
>
(
op_name
,
attr
);
return
heavily_transposer
;
}
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is unimplemented. Use default "
"LayoutTransformer instead."
;
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is unimplemented. Use default."
;
return
transposer
;
}
...
...
@@ -139,24 +178,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
std
::
vector
<
int
>*
attr
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
if
(
op_name
==
"transpose2"
)
{
if
(
op_name
==
"transpose2"
&&
(
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()))
{
auto
trans
=
std
::
make_shared
<
EagerTransposeOpTransformer
>
(
op_name
);
if
(
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
())
{
trans
->
SetAttr
(
attr
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
trans
;
}
trans
->
SetAttr
(
attr
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
trans
;
}
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
...
...
@@ -172,33 +210,32 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize
>&
tensors_vector
,
paddle
::
experimental
::
Scalar
*
axis
,
bool
*
keep_dim
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
op_name
==
"argmax"
)
{
if
(
op_name
==
"argmax"
&&
(
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
)
&&
(
*
keep_dim
))
{
std
::
shared_ptr
<
EagerArgmaxOpTransformer
>
argmax_transform
=
nullptr
;
argmax_transform
=
std
::
make_shared
<
EagerArgmaxOpTransformer
>
(
op_name
);
if
((
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
)
&&
(
*
keep_dim
))
{
argmax_transform
->
SetAttr
(
axis
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
argmax_transform
;
}
argmax_transform
->
SetAttr
(
axis
,
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
argmax_transform
;
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
// lightly
int
flatten
// lightly
for
flatten
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
int
,
int
>
(
const
std
::
string
&
op_name
,
...
...
@@ -206,17 +243,17 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
kSlotSmallVectorSize
>&
tensors_vector
,
int
*
start_axis
,
int
*
stop_axis
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
" Optimze Layout Unstarted : "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
desired_layout
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
bool
no_tranpose
=
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
bool
no_tranpose
=
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
;
bool
is_valid
=
((
*
start_axis
)
==
1
&&
(
*
stop_axis
)
==
3
);
if
(
op_name
==
"flatten"
||
op_name
==
"flatten_contiguous_range"
)
{
if
(
no_tranpose
&&
is_valid
)
{
...
...
@@ -226,15 +263,13 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
}
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
// lightly int Concat
// lightly T can be int vector<int> vector<int64_t> IntArray
template
<
>
// default int
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
paddle
::
experimental
::
Scalar
>
(
const
std
::
string
&
op_name
,
...
...
@@ -243,30 +278,21 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
paddle
::
experimental
::
Scalar
*
axis
)
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
desired_layout
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
}
bool
need_transpose
=
false
;
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
desired_layout
!=
tensors_vector
[
i
][
idx
].
layout
())
{
need_transpose
=
true
;
}
}
}
if
(
need_transpose
)
{
VLOG
(
3
)
<<
"Concat need transpose to NCHW "
<<
op_name
;
if
(
NeedTransLayout
(
tensors_vector
,
desired_layout
))
{
VLOG
(
3
)
<<
op_name
<<
" need transpose to default layout"
;
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
}
else
{
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
auto
trans
=
std
::
make_shared
<
EagerConcatOpTransformer
>
(
op_name
);
trans
->
SetAttr
(
axis
,
desired_layout
);
return
trans
;
...
...
paddle/fluid/eager/eager_layout_transformer.h
浏览文件 @
16439bb9
...
...
@@ -22,9 +22,9 @@ namespace egr {
inline
paddle
::
experimental
::
Tensor
EagerTraceTransposeOp
(
const
paddle
::
experimental
::
DataLayout
layout
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
VLOG
(
4
)
<<
"AutoTune Transpose from "
<<
in
.
layout
()
<<
" to "
<<
layout
<<
", tensor's shape is "
<<
in
.
shape
().
size
();
if
(
in
.
shape
().
size
()
!=
4
)
{
VLOG
(
4
)
<<
"Shape is "
<<
in
.
shape
().
size
()
<<
" can't transpose to"
<<
paddle
::
framework
::
DataLayoutToString
(
layout
);
return
in
;
}
std
::
vector
<
int
>
axis
;
...
...
@@ -44,77 +44,75 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
// agnostic op
class
EagerLayoutTransformer
{
using
Layout
=
paddle
::
experimental
::
DataLayout
;
public:
EagerLayoutTransformer
()
:
op_name_
(
""
)
{}
explicit
EagerLayoutTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
:
op_name_
(
op_name
)
{
final_layout_
=
"UNDEFINED"
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
final_layout_
==
"UNDEFINED"
)
{
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
tensors_vector
[
0
][
0
].
layout
());
}
else
if
(
tensors_vector
[
i
][
idx
].
layout
()
==
desired_layout
)
{
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout
);
break
;
}
}
}
VLOG
(
4
)
<<
op_name_
<<
"final_layout_ is "
<<
final_layout_
;
}
EagerLayoutTransformer
()
:
op_name_
(
""
),
final_layout_
(
Layout
::
UNDEFINED
)
{}
EagerLayoutTransformer
(
const
EagerLayoutTransformer
&
)
=
delete
;
EagerLayoutTransformer
&
operator
=
(
const
EagerLayoutTransformer
&
)
=
delete
;
explicit
EagerLayoutTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
const
Layout
final_layout
=
Layout
::
UNDEFINED
)
:
op_name_
(
op_name
),
final_layout_
(
final_layout
)
{
VLOG
(
4
)
<<
"Agnostic op : "
<<
op_name_
<<
" final_layout_ is "
<<
final_layout_
;
}
virtual
~
EagerLayoutTransformer
()
{}
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, final_layout_ is "
<<
final_layout_
;
return
in
;
virtual
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
if
(
final_layout_
==
Layout
::
UNDEFINED
||
final_layout_
==
in
.
layout
())
{
VLOG
(
4
)
<<
"EagerLayoutTransformer with no trans"
;
return
in
;
}
else
{
// from NCHW to NHWC
VLOG
(
4
)
<<
"EagerLayoutTransformer with trans from "
<<
in
.
layout
()
<<
" to "
<<
final_layout_
;
auto
out_tensor
=
EagerTraceTransposeOp
(
final_layout_
,
in
);
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
.
impl
().
get
()))
->
layout
=
final_layout_
;
return
out_tensor
;
}
}
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensor
(
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
>&
in
)
{
return
in
;
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
return
in
?
TransInTensor
(
in_name
,
*
in
)
:
in
;
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
" TransInTensor"
;
return
in
;
}
virtual
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensors
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
in
)
{
VLOG
(
4
)
<<
" TransInTensor"
;
if
(
in
)
{
return
TransInTensors
(
in_name
,
*
in
);
}
return
in
;
}
virtual
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
bool
use_default
=
(
final_layout_
==
"Undefined(AnyLayout)"
||
final_layout_
==
(
"UNDEFINED"
));
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
if
(
!
use_default
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
}
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
virtual
void
SetOutTensorLayout
(
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
VLOG
(
4
)
<<
"optional out_tensor"
;
}
virtual
void
SetOutTensorLayout
(
std
::
vector
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
bool
use_default
=
(
final_layout_
==
"Undefined(AnyLayout)"
||
final_layout_
==
(
"UNDEFINED"
));
bool
use_default
=
(
final_layout_
==
Layout
::
UNDEFINED
);
if
(
!
use_default
)
{
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
...
...
@@ -126,9 +124,24 @@ class EagerLayoutTransformer {
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
virtual
void
SetOutTensorLayout
(
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>*
out_tensor
)
{
VLOG
(
4
)
<<
"optional out_tensor"
;
}
virtual
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
bool
use_default
=
final_layout_
==
Layout
::
UNDEFINED
;
if
(
!
use_default
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
final_layout_
;
}
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
protected:
std
::
string
op_name_
;
std
::
string
final_layout_
;
const
Layout
final_layout_
;
};
class
EagerHeavilyLayoutSensitiveOpTransformer
:
public
EagerLayoutTransformer
{
...
...
@@ -145,21 +158,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
}
}
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is heavily"
;
return
in
;
}
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is heavily"
;
return
in
;
}
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
if
(
heavily_input_
.
count
(
in_name
)
!=
0
&&
in
.
layout
()
!=
desired_layout_
)
{
...
...
@@ -230,7 +228,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
if
(
final_layout_
==
input_layout
&&
in
.
shape
().
size
()
==
4
)
{
VLOG
(
4
)
<<
op_name_
<<
"'s "
<<
in_name
<<
" need transpose from "
<<
input_layout
<<
" to default_layout"
;
...
...
@@ -245,7 +242,7 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
return
in
;
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
std
::
vector
<
paddle
::
experimental
::
Tensor
>
result
;
...
...
@@ -340,22 +337,19 @@ class EagerTransposeOpTransformer
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
VLOG
(
4
)
<<
"with no transpose: EagerTransposeOpTransformer "
<<
in_name
<<
"'s layout is "
<<
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
return
in
;
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
auto
de
sired
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
if
(
out_tensor
->
layout
()
!=
de
sired
_layout
)
{
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
if
(
out_tensor
->
layout
()
!=
de
fault
_layout
)
{
VLOG
(
4
)
<<
" Set Out_tensor's layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
default_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
de
sired
_layout
;
->
layout
=
de
fault
_layout
;
}
}
...
...
@@ -385,15 +379,15 @@ class EagerArgmaxOpTransformer
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
VLOG
(
4
)
<<
"EagerArgmaxOpTransformer's out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
auto
de
sired
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
if
(
de
sired
_layout
!=
out_tensor
->
layout
())
{
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
if
(
de
fault
_layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
default_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
de
sired
_layout
;
->
layout
=
de
fault
_layout
;
}
}
...
...
@@ -410,11 +404,11 @@ class EagerFlattenOpTransformer
explicit
EagerFlattenOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
de
sired
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
std
::
string
de
sired
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
sired
_layout
);
final_layout_
=
de
sired
_layout_str
;
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
std
::
string
de
fault
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
fault
_layout
);
final_layout_
=
de
fault
_layout_str
;
}
// transpose from NHWC to NCHW
...
...
@@ -424,16 +418,17 @@ class EagerFlattenOpTransformer
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
VLOG
(
4
)
<<
"Eager
Argmax
OpTransformer's out layout is"
VLOG
(
4
)
<<
"Eager
Flatten
OpTransformer's out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
if
(
layout
!=
out_tensor
->
layout
())
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
desired_layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
desired_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
->
layout
=
desired_
layout
;
}
}
...
...
@@ -450,11 +445,11 @@ class EagerConcatOpTransformer
explicit
EagerConcatOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
de
sired
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
std
::
string
de
sired
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
sired
_layout
);
final_layout_
=
de
sired
_layout_str
;
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
std
::
string
de
fault
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
fault
_layout
);
final_layout_
=
de
fault
_layout_str
;
}
void
SetAttr
(
paddle
::
experimental
::
Scalar
*
axis
,
...
...
@@ -467,7 +462,7 @@ class EagerConcatOpTransformer
(
*
axis
)
=
static_cast
<
paddle
::
experimental
::
Scalar
>
(
perm
[
axes
]);
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
return
in
;
...
...
paddle/fluid/imperative/layout_autotune.cc
浏览文件 @
16439bb9
...
...
@@ -14,23 +14,15 @@
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/imperative/layout_transformer.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
namespace
paddle
{
namespace
imperative
{
bool
LayoutAutoTune
::
UseLayoutAutoTune
()
const
{
#if defined(PADDLE_WITH_CUDA)
return
use_layout_autotune_
;
#else
return
false
;
#endif
}
LayoutAutoTune
::
LayoutAutoTune
()
{
const
auto
&
op_info
=
paddle
::
framework
::
OpInfoMap
::
Instance
().
map
();
for
(
auto
it
=
op_info
.
begin
();
it
!=
op_info
.
end
();
it
++
)
{
...
...
@@ -140,6 +132,26 @@ paddle::imperative::NameVarMap<VarType> DealLightlyLayoutSensitive(
return
transposer
->
Apply
(
ins
,
outs
,
attrs
,
tracer
);
}
LayoutAutotuneGuard
::
LayoutAutotuneGuard
(
std
::
shared_ptr
<
Tracer
>
tracer
,
bool
use_autotune
)
:
tracer_
(
tracer
)
{
pre_layout_autotune_
=
tracer_
->
UseLayoutAutoTune
();
if
(
pre_layout_autotune_
!=
use_autotune
)
{
tracer_
->
EnableLayoutAutoTune
();
if
(
!
use_autotune
)
{
tracer_
->
DisableLayoutAutoTune
();
}
}
}
LayoutAutotuneGuard
::~
LayoutAutotuneGuard
()
{
if
(
pre_layout_autotune_
)
{
tracer_
->
EnableLayoutAutoTune
();
}
else
{
tracer_
->
DisableLayoutAutoTune
();
}
}
template
<
typename
VarType
>
paddle
::
imperative
::
NameVarMap
<
VarType
>
AutoTuneLayout
(
const
std
::
string
&
op_type
,
...
...
@@ -147,7 +159,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const
paddle
::
imperative
::
NameVarMap
<
VarType
>&
outs
,
paddle
::
framework
::
AttributeMap
*
attrs
,
const
std
::
shared_ptr
<
imperative
::
Tracer
>&
tracer
)
{
if
(
!
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
())
{
if
(
!
tracer
->
UseLayoutAutoTune
())
{
return
ins
;
}
// When layout autotuning is enabled, the tuner will check the desired layout.
...
...
@@ -165,7 +177,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
}
else
{
#if defined(PADDLE_WITH_CUDA)
if
(
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
tracer
->
DisableLayoutAutoTune
();
return
ins
;
}
#endif
...
...
@@ -185,7 +197,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
}
else
if
(
is_tune_fp16
)
{
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
DataLayout
::
NHWC
);
}
else
{
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
tracer
->
DisableLayoutAutoTune
();
return
ins
;
}
VLOG
(
3
)
<<
"Tune the layout from "
...
...
paddle/fluid/imperative/layout_autotune.h
浏览文件 @
16439bb9
...
...
@@ -19,8 +19,8 @@
#include <unordered_set>
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/common/layout.h"
namespace
paddle
{
namespace
imperative
{
...
...
@@ -35,12 +35,6 @@ class LayoutAutoTune {
return
layout_autoTune
;
}
bool
UseLayoutAutoTune
()
const
;
void
EnableLayoutAutoTune
()
{
use_layout_autotune_
=
true
;
}
void
DisableLayoutAutoTune
()
{
use_layout_autotune_
=
false
;
}
bool
IsHeavilyLayoutSensitive
(
const
std
::
string
&
op_type
)
const
{
return
heavily_layout_sensitive_ops_
.
count
(
op_type
)
!=
0
;
}
...
...
@@ -64,8 +58,6 @@ class LayoutAutoTune {
private:
LayoutAutoTune
();
bool
use_layout_autotune_
{
false
};
std
::
unordered_set
<
std
::
string
>
layout_agnostic_ops_
{};
std
::
unordered_set
<
std
::
string
>
heavily_layout_sensitive_ops_
{
"batch_norm"
};
...
...
@@ -73,11 +65,29 @@ class LayoutAutoTune {
std
::
unordered_set
<
std
::
string
>
lightly_layout_sensitive_ops_
{
"instance_norm"
,
"softmax"
,
"transpose"
,
"transpose2"
,
"reshape2"
};
// Best Layout in this platform
DataLayout
desired_layout_
{
DataLayout
::
UNDEFINED
};
// Default Layout in this model
DataLayout
default_layout_
{
DataLayout
::
UNDEFINED
};
};
// LayoutAutotuneGuard is used for RAII.
class
LayoutAutotuneGuard
{
public:
LayoutAutotuneGuard
(
std
::
shared_ptr
<
Tracer
>
tracer
,
bool
use_autotune
);
~
LayoutAutotuneGuard
();
// forbid copy and operator=
LayoutAutotuneGuard
(
const
LayoutAutotuneGuard
&
guard
)
=
delete
;
LayoutAutotuneGuard
&
operator
=
(
const
LayoutAutotuneGuard
&
guard
)
=
delete
;
private:
std
::
shared_ptr
<
Tracer
>
tracer_
;
bool
pre_layout_autotune_
;
};
template
<
typename
VarType
>
paddle
::
imperative
::
NameVarMap
<
VarType
>
AutoTuneLayout
(
const
std
::
string
&
op_type
,
...
...
paddle/fluid/imperative/layout_transformer.h
浏览文件 @
16439bb9
...
...
@@ -19,8 +19,24 @@
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/tensor_utils.h"
namespace
paddle
{
namespace
imperative
{
template
<
typename
VarType
>
void
SetOutDataLayout
(
std
::
shared_ptr
<
VarType
>
var
,
const
paddle
::
experimental
::
DataLayout
layout
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
// set out_tensor's layout
if
(
var
->
MutableVar
()
->
IsInitialized
())
{
paddle
::
framework
::
Variable
*
tmp_var
=
var
->
MutableVar
();
auto
*
out
=
tmp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
framework
::
LoDTensor
*>
(
out
))
->
layout
=
layout
;
}
}
}
template
<
typename
VarType
>
std
::
shared_ptr
<
VarType
>
TraceTransposeOp
(
...
...
@@ -118,7 +134,7 @@ class LayoutTransformer {
auto
out_vars
=
outs
.
at
(
name
);
for
(
auto
&
var
:
out_vars
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
paddle
::
imperative
::
Set
Out
DataLayout
(
var
,
layout
);
}
}
not_in_out
=
false
;
...
...
@@ -130,7 +146,7 @@ class LayoutTransformer {
for
(
auto
&
pair
:
outs
)
{
for
(
auto
&
var
:
pair
.
second
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
paddle
::
imperative
::
Set
Out
DataLayout
(
var
,
layout
);
}
}
}
...
...
paddle/fluid/imperative/tracer.cc
浏览文件 @
16439bb9
...
...
@@ -42,6 +42,8 @@ thread_local bool Tracer::enable_program_desc_tracing_ = false;
thread_local
bool
Tracer
::
has_grad_
=
true
;
thread_local
bool
Tracer
::
use_layout_autotune_
=
false
;
thread_local
AmpLevel
Tracer
::
amp_level_
=
AmpLevel
::
O0
;
thread_local
phi
::
DataType
Tracer
::
amp_dtype_
=
phi
::
DataType
::
FLOAT32
;
...
...
paddle/fluid/imperative/tracer.h
浏览文件 @
16439bb9
...
...
@@ -28,9 +28,9 @@
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/core/compat/arg_map_context.h"
namespace
paddle
{
namespace
imperative
{
...
...
@@ -184,6 +184,20 @@ class Tracer {
}
}
void
DisableLayoutAutoTune
()
{
use_layout_autotune_
=
false
;
}
void
EnableLayoutAutoTune
()
{
use_layout_autotune_
=
true
;
}
bool
UseLayoutAutoTune
()
{
#if defined(PADDLE_WITH_CUDA)
if
(
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
return
use_layout_autotune_
;
}
#endif
use_layout_autotune_
=
false
;
return
false
;
}
phi
::
KernelSignature
GetExpectedKernelSignature
(
const
std
::
string
&
type
,
const
NameTensorMap
&
ins
,
...
...
@@ -199,8 +213,8 @@ class Tracer {
std
::
unique_ptr
<
UniqueNameGenerator
>
generator_
;
platform
::
Place
expected_place_
;
GarbageCollectorMap
gcs_
;
static
thread_local
bool
enable_program_desc_tracing_
;
static
thread_local
bool
use_layout_autotune_
;
static
thread_local
bool
has_grad_
;
static
thread_local
AmpLevel
amp_level_
;
static
thread_local
phi
::
DataType
amp_dtype_
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
16439bb9
...
...
@@ -2503,19 +2503,14 @@ All parameter, weight, gradient are variables in Paddle.
return
res
;
});
m
.
def
(
"enable_layout_autotune"
,
[]
{
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
.
EnableLayoutAutoTune
();
});
m
.
def
(
"enable_layout_autotune"
,
[]
{
return
egr
::
Controller
::
Instance
().
EnableLayoutAutoTune
();
});
m
.
def
(
"disable_layout_autotune"
,
[]
{
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
.
DisableLayoutAutoTune
();
});
m
.
def
(
"disable_layout_autotune"
,
[]
{
return
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
});
m
.
def
(
"use_layout_autotune"
,
[]
{
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
();
});
m
.
def
(
"use_layout_autotune"
,
[]
{
return
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
();
});
BindFleetWrapper
(
&
m
);
BindIO
(
&
m
);
...
...
paddle/phi/api/lib/data_transform.cc
浏览文件 @
16439bb9
...
...
@@ -52,9 +52,9 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input,
return
ret
;
}
inline
bool
NeedTransformLayout
(
const
paddle
::
platform
::
Place
&
place
,
const
DataLayout
&
input
,
inline
bool
NeedTransformLayout
(
const
DataLayout
&
input
,
const
DataLayout
&
target
,
const
paddle
::
platform
::
Place
&
place
,
const
TransformFlag
&
transform_flag
)
{
bool
ret
=
transform_flag
.
need_trans_layout
()
&&
(
input
!=
DataLayout
::
ALL_LAYOUT
&&
...
...
@@ -202,9 +202,9 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
bool
trans_layout
=
false
;
bool
trans_dtype
=
false
;
if
(
NeedTransformLayout
(
tensor
->
place
(),
tensor
->
layout
(),
if
(
NeedTransformLayout
(
tensor
->
layout
(),
target_args_def
.
layout
,
tensor
->
place
(),
transform_flag
))
{
out
=
TransDataLayout
(
out
,
target_args_def
.
layout
);
trans_layout
=
true
;
...
...
@@ -240,9 +240,9 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
dense_tensor
.
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
!
NeedTransformDataType
(
dense_tensor
.
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
dense_tensor
.
place
(),
dense_tensor
.
layout
(),
!
NeedTransformLayout
(
dense_tensor
.
layout
(),
target_args_def
.
layout
,
dense_tensor
.
place
(),
transform_flag
)))
{
return
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
);
}
...
...
@@ -277,9 +277,9 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData(
tensor_in
->
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
!
NeedTransformDataType
(
tensor_in
->
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
tensor_in
->
place
(),
tensor_in
->
layout
(),
!
NeedTransformLayout
(
tensor_in
->
layout
(),
target_args_def
.
layout
,
tensor_in
->
place
(),
transform_flag
)))
{
pt_tensors
->
emplace_back
(
*
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
));
...
...
python/paddle/fluid/tests/unittests/test_layout_autotune.py
浏览文件 @
16439bb9
...
...
@@ -46,6 +46,13 @@ class SimpleNet(paddle.nn.Layer):
class
LayoutAutoTune
(
unittest
.
TestCase
):
def
test_config
(
self
):
paddle
.
fluid
.
core
.
enable_layout_autotune
()
if
self
.
use_autoune
():
self
.
assertEqual
(
paddle
.
fluid
.
core
.
use_layout_autotune
(),
True
)
paddle
.
fluid
.
core
.
disable_layout_autotune
()
self
.
assertEqual
(
paddle
.
fluid
.
core
.
use_layout_autotune
(),
False
)
def
setUp
(
self
):
self
.
use_autoune
()
...
...
python/paddle/nn/functional/conv.py
浏览文件 @
16439bb9
...
...
@@ -130,15 +130,13 @@ def _conv_nd(x,
if
bias
is
not
None
:
channel_dim
=
channel_dim
+
len
(
x
.
shape
)
if
channel_dim
<
0
else
channel_dim
if
pre_bias
.
layout
==
"NHWC"
:
channel_dim
=
3
# last dim
if
isinstance
(
x
,
tuple
):
x
=
x
[
0
]
if
isinstance
(
bias
,
tuple
):
bias
=
bias
[
0
]
if
len
(
bias
.
shape
)
<
len
(
x
.
shape
):
tmp_bias
=
_C_ops
.
reshape
(
bias
,
bias
.
shape
+
bias
,
[
1
for
i
in
range
(
channel_dim
)]
+
bias
.
shape
+
[
1
for
i
in
range
(
len
(
x
.
shape
)
-
channel_dim
-
1
)])
return
_C_ops
.
add
(
pre_bias
,
tmp_bias
)
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录