Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
16439bb9
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
16439bb9
编写于
9月 19, 2022
作者:
N
niuliling123
提交者:
GitHub
9月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update layoutautotune for inplace (#45826)
上级
46e4fb2a
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
443 addition
and
326 deletion
+443
-326
paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
...r/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
+11
-12
paddle/fluid/eager/api/utils/global_utils.h
paddle/fluid/eager/api/utils/global_utils.h
+17
-0
paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
...le/fluid/eager/auto_code_generator/generator/eager_gen.py
+112
-86
paddle/fluid/eager/eager_layout_auto_tune.h
paddle/fluid/eager/eager_layout_auto_tune.h
+112
-86
paddle/fluid/eager/eager_layout_transformer.h
paddle/fluid/eager/eager_layout_transformer.h
+90
-95
paddle/fluid/imperative/layout_autotune.cc
paddle/fluid/imperative/layout_autotune.cc
+24
-12
paddle/fluid/imperative/layout_autotune.h
paddle/fluid/imperative/layout_autotune.h
+19
-9
paddle/fluid/imperative/layout_transformer.h
paddle/fluid/imperative/layout_transformer.h
+18
-2
paddle/fluid/imperative/tracer.cc
paddle/fluid/imperative/tracer.cc
+2
-0
paddle/fluid/imperative/tracer.h
paddle/fluid/imperative/tracer.h
+16
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-11
paddle/phi/api/lib/data_transform.cc
paddle/phi/api/lib/data_transform.cc
+8
-8
python/paddle/fluid/tests/unittests/test_layout_autotune.py
python/paddle/fluid/tests/unittests/test_layout_autotune.py
+7
-0
python/paddle/nn/functional/conv.py
python/paddle/nn/functional/conv.py
+1
-3
未找到文件。
paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
浏览文件 @
16439bb9
...
@@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func(
...
@@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func(
auto
amp_dst_dtype
=
egr
::
GetAmpDestDtype
(
op_name
,
amp_tensors_vector
);
auto
amp_dst_dtype
=
egr
::
GetAmpDestDtype
(
op_name
,
amp_tensors_vector
);
auto
NEW
_input
=
auto
new
_input
=
egr
::
EagerAmpAutoCast
(
"input"
,
input
,
amp_dst_dtype
,
op_name
);
egr
::
EagerAmpAutoCast
(
"input"
,
input
,
amp_dst_dtype
,
op_name
);
auto
NEW
_filter
=
auto
new
_filter
=
egr
::
EagerAmpAutoCast
(
"filter"
,
filter
,
amp_dst_dtype
,
op_name
);
egr
::
EagerAmpAutoCast
(
"filter"
,
filter
,
amp_dst_dtype
,
op_name
);
{
{
paddle
::
imperative
::
AutoCastGuard
guard
(
paddle
::
imperative
::
AutoCastGuard
guard
(
egr
::
Controller
::
Instance
().
GetCurrentTracer
(),
egr
::
Controller
::
Instance
().
GetCurrentTracer
(),
paddle
::
imperative
::
AmpLevel
::
O0
);
paddle
::
imperative
::
AmpLevel
::
O0
);
return
conv2d_ad_func
(
NEW
_input
,
return
conv2d_ad_func
(
new
_input
,
NEW
_filter
,
new
_filter
,
strides
,
strides
,
paddings
,
paddings
,
paddding_algorithm
,
paddding_algorithm
,
...
@@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func(
...
@@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func(
// Layout autotune
// Layout autotune
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
())
{
if
(
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
())
{
VLOG
(
5
)
<<
"Check and Prepare For LAYOUT"
;
VLOG
(
5
)
<<
"Check and Prepare For LAYOUT"
;
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
egr
::
kSlotSmallVectorSize
>
...
@@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func(
...
@@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func(
auto
op_name
=
phi
::
TransToFluidOpName
(
"conv2d"
);
auto
op_name
=
phi
::
TransToFluidOpName
(
"conv2d"
);
auto
transformer
=
egr
::
EagerLayoutAutotune
<
std
::
string
>
(
auto
transformer
=
egr
::
EagerLayoutAutotune
<
std
::
string
>
(
op_name
,
tensors_vector
,
&
data_format
);
op_name
,
tensors_vector
,
&
data_format
);
auto
NEW_input
=
transformer
->
TransInTensor
(
"input"
,
input
);
auto
new_input
=
transformer
->
TransInTensor
(
"input"
,
input
);
bool
is_enable_tune
=
bool
need_tune
=
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
();
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
auto
out
=
conv2d_ad_func
(
new_input
,
auto
out
=
conv2d_ad_func
(
NEW_input
,
filter
,
filter
,
strides
,
strides
,
paddings
,
paddings
,
...
@@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func(
...
@@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func(
workspace_size_MB
,
workspace_size_MB
,
exhaustive_search
);
exhaustive_search
);
transformer
->
SetOutTensorLayout
(
&
out
);
transformer
->
SetOutTensorLayout
(
&
out
);
if
(
is_enable
_tune
)
{
if
(
need
_tune
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
EnableLayoutAutoTune
();
egr
::
Controller
::
Instance
().
EnableLayoutAutoTune
();
}
}
// Returns
// Returns
return
out
;
return
out
;
...
...
paddle/fluid/eager/api/utils/global_utils.h
浏览文件 @
16439bb9
...
@@ -55,6 +55,23 @@ class Controller {
...
@@ -55,6 +55,23 @@ class Controller {
paddle
::
imperative
::
AmpLevel
GetAMPLevel
()
const
{
paddle
::
imperative
::
AmpLevel
GetAMPLevel
()
const
{
return
tracer_
->
GetAmpLevel
();
return
tracer_
->
GetAmpLevel
();
}
}
bool
UseLayoutAutoTune
()
{
bool
use_autotune
=
false
;
#if defined(PADDLE_WITH_CUDA)
auto
place
=
tracer_
->
ExpectedPlace
();
bool
is_gpu_place
=
paddle
::
platform
::
is_gpu_place
(
place
);
if
(
is_gpu_place
)
{
use_autotune
=
tracer_
->
UseLayoutAutoTune
();
}
#endif
return
use_autotune
;
}
void
DisableLayoutAutoTune
()
{
tracer_
->
DisableLayoutAutoTune
();
}
void
EnableLayoutAutoTune
()
{
tracer_
->
EnableLayoutAutoTune
();
}
bool
HasGrad
()
const
{
return
tracer_
->
HasGrad
();
}
bool
HasGrad
()
const
{
return
tracer_
->
HasGrad
();
}
void
SetHasGrad
(
bool
has_grad
)
{
tracer_
->
SetHasGrad
(
has_grad
);
}
void
SetHasGrad
(
bool
has_grad
)
{
tracer_
->
SetHasGrad
(
has_grad
);
}
std
::
string
GenerateUniqueName
(
std
::
string
key
=
"eager_in_tmp"
)
{
std
::
string
GenerateUniqueName
(
std
::
string
key
=
"eager_in_tmp"
)
{
...
...
paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
浏览文件 @
16439bb9
...
@@ -437,15 +437,14 @@ AMP_LOGIC_TEMPLATE = \
...
@@ -437,15 +437,14 @@ AMP_LOGIC_TEMPLATE = \
"""
"""
LAYOUT_LOGIC_TEMPLATE
=
\
LAYOUT_LOGIC_TEMPLATE
=
\
"""
"""
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{
if (egr::Controller::Instance().UseLayoutAutoTune()) {{
VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{}
{}
{}
{}
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
VLOG(5) << "Check and Prepare For LAYOUT "<< op_name;
paddle::imperative::LayoutAutotuneGuard guard(egr::Controller::Instance().GetCurrentTracer(), false);
{}
{}
{}
{}
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns
// Returns
return {};
return {};
}}
}}
...
@@ -922,6 +921,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
...
@@ -922,6 +921,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
set_grad_in_meta
=
f
"
{
indent
}
grad_node->SetGradInMeta(
{
name
}
,
{
pos
}
);"
set_grad_in_meta
=
f
"
{
indent
}
grad_node->SetGradInMeta(
{
name
}
,
{
pos
}
);"
set_retain_grad
=
f
"
{
indent
}
egr::EagerUtils::CheckAndRetainGrad(
{
name
}
);"
set_retain_grad
=
f
"
{
indent
}
egr::EagerUtils::CheckAndRetainGrad(
{
name
}
);"
set_out_rank_list
.
append
(
set_out_rank
)
set_out_rank_list
.
append
(
set_out_rank
)
set_history_list
.
append
(
set_history
)
set_history_list
.
append
(
set_history
)
set_grad_in_meta_list
.
append
(
set_grad_in_meta
)
set_grad_in_meta_list
.
append
(
set_grad_in_meta
)
...
@@ -1014,6 +1014,98 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
...
@@ -1014,6 +1014,98 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
self
.
forward_definition_str
=
""
self
.
forward_definition_str
=
""
self
.
forward_declaration_str
=
""
self
.
forward_declaration_str
=
""
def
GenerateForwardLayoutAutotune
(
self
,
forward_api_name
,
amp_tensors_vector_list
,
layout_tensors_vector_optional_list
,
layout_autotune_list_str
,
returns_type_str
,
returns_str
,
amp_inputs_call_args_str
):
intermediate_outputs
=
self
.
intermediate_outputs
forward_attrs_list
=
self
.
forward_attrs_list
forward_outputs_position_map
=
self
.
forward_outputs_position_map
num_outputs
=
len
(
forward_outputs_position_map
.
keys
())
-
len
(
intermediate_outputs
)
# for layout autotune attr
lightly_sensitive_attr
=
[
'axis'
,
'axes'
,
'dim'
,
'dims'
,
'start'
,
'end'
,
'stop'
]
heavily_sensitive_attr
=
[
'data_format'
,
'data_layout'
]
layout_autotune_attr
=
[]
layout_autotune_attr_code_list
=
[]
layout_autotune_attr_type_list
=
[]
layout_autotune_attr_code_list
.
append
(
f
"auto op_name = phi::TransToFluidOpName(
\"
{
forward_api_name
}
\"
);
\n
"
)
lightly_flag
=
False
heavily_flag
=
False
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
for
attr_name
in
lightly_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
(
name
not
in
layout_autotune_attr
):
lightly_flag
=
True
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
if
lightly_flag
is
False
:
for
attr_name
in
heavily_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
(
name
not
in
layout_autotune_attr
):
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
heavily_flag
=
True
if
len
(
layout_autotune_attr
)
==
0
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
1
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
2
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
,
{
layout_autotune_attr_type_list
[
1
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
, &
{
layout_autotune_attr
[
1
]
}
);
\n
"
)
else
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector,&
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
# Out tensor
layout_inputs_call_args_str
=
amp_inputs_call_args_str
forward_function_name
=
GetDygraphForwardFunctionName
(
forward_api_name
)
layout_tmp_result_list
=
[]
layout_autotune_outs_list
=
[]
result_name
=
"api_result"
if
num_outputs
==
1
:
result_name
=
returns_str
layout_autotune_outs_list
.
append
(
f
"transformer -> SetOutTensorLayout(&
{
returns_str
}
);
\n
"
)
else
:
for
name
,
(
rtype
,
pos
)
in
forward_outputs_position_map
.
items
():
if
name
in
intermediate_outputs
:
continue
layout_autotune_outs_list
.
append
(
f
" auto&
{
name
}
= std::get<
{
len
(
layout_tmp_result_list
)
}
>(api_result);
\n
"
)
layout_autotune_outs_list
.
append
(
f
" transformer -> SetOutTensorLayout(&
{
name
}
);
\n
"
)
layout_tmp_result_list
.
append
(
f
"
{
name
}
"
)
tensors_vector_list_str
=
"{ "
+
","
.
join
(
amp_tensors_vector_list
)
+
" }"
if
len
(
amp_tensors_vector_list
)
==
0
:
layout_logic_str
=
""
else
:
after_call_str
=
f
"
{
returns_type_str
}
{
result_name
}
=
{
forward_function_name
}
(
{
layout_inputs_call_args_str
}
);
\n
"
layout_logic_str
=
LAYOUT_LOGIC_TEMPLATE
.
format
(
tensors_vector_list_str
,
" "
.
join
(
layout_tensors_vector_optional_list
),
" "
.
join
(
layout_autotune_attr_code_list
)
+
" "
+
layout_autotune_list_str
,
after_call_str
,
" "
.
join
(
layout_autotune_outs_list
),
returns_str
)
return
layout_logic_str
def
GenerateForwardDefinitionAndDeclaration
(
self
,
is_inplaced
):
def
GenerateForwardDefinitionAndDeclaration
(
self
,
is_inplaced
):
namespace
=
self
.
namespace
namespace
=
self
.
namespace
if
self
.
forward_api_name
[
-
1
]
==
'_'
and
not
is_inplaced
:
if
self
.
forward_api_name
[
-
1
]
==
'_'
and
not
is_inplaced
:
...
@@ -1049,7 +1141,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
...
@@ -1049,7 +1141,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
layout_tensors_vector_optional_list
=
[]
layout_tensors_vector_optional_list
=
[]
for
name
,
(
ttype
,
pos
)
in
forward_inputs_position_map
.
items
():
for
name
,
(
ttype
,
pos
)
in
forward_inputs_position_map
.
items
():
inputs_call_list
[
pos
]
=
f
"
{
name
}
"
inputs_call_list
[
pos
]
=
f
"
{
name
}
"
amp_inputs_call_list
[
pos
]
=
f
"
NEW
_
{
name
}
"
amp_inputs_call_list
[
pos
]
=
f
"
new
_
{
name
}
"
is_optional
=
(
name
in
optional_inputs
)
is_optional
=
(
name
in
optional_inputs
)
if
IsPlainTensorType
(
ttype
):
if
IsPlainTensorType
(
ttype
):
if
is_optional
:
if
is_optional
:
...
@@ -1062,13 +1154,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
...
@@ -1062,13 +1154,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f
"if (
{
name
}
) amp_tensors_vector.push_back({{ *
{
name
}
}});
\n
"
f
"if (
{
name
}
) amp_tensors_vector.push_back({{ *
{
name
}
}});
\n
"
)
)
amp_autocast_optional_list
.
append
(
amp_autocast_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
)
layout_tensors_vector_optional_list
.
append
(
layout_tensors_vector_optional_list
.
append
(
f
"if (
{
name
}
) tensors_vector.push_back({{ *
{
name
}
}});
\n
"
f
"if (
{
name
}
) tensors_vector.push_back({{ *
{
name
}
}});
\n
"
)
)
layout_autotune_optional_list
.
append
(
layout_autotune_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
)
else
:
else
:
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
...
@@ -1076,16 +1168,16 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
...
@@ -1076,16 +1168,16 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str
=
f
"paddle::experimental::Tensor&
{
name
}
"
arg_str
=
f
"paddle::experimental::Tensor&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_autocast_list
.
append
(
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
)
else
:
else
:
arg_str
=
f
"const paddle::experimental::Tensor&
{
name
}
"
arg_str
=
f
"const paddle::experimental::Tensor&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_autocast_list
.
append
(
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
)
layout_autotune_list
.
append
(
layout_autotune_list
.
append
(
f
"auto
NEW
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new
_
{
name
}
= transformer->TransInTensor(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
)
else
:
else
:
assert
IsVectorTensorType
(
ttype
)
assert
IsVectorTensorType
(
ttype
)
...
@@ -1099,10 +1191,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
...
@@ -1099,10 +1191,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f
"if (
{
name
}
) amp_tensors_vector.push_back( *
{
name
}
);
\n
"
f
"if (
{
name
}
) amp_tensors_vector.push_back( *
{
name
}
);
\n
"
)
)
amp_autocast_optional_list
.
append
(
amp_autocast_optional_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
)
layout_autotune_optional_list
.
append
(
layout_autotune_optional_list
.
append
(
f
"auto
NEW_
{
name
}
= transformer->TransInTensor
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new_
{
name
}
= transformer->TransInTensors
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
)
else
:
else
:
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
if
is_inplaced
and
forward_inplace_map
and
name
in
forward_inplace_map
.
keys
(
...
@@ -1112,60 +1204,15 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
...
@@ -1112,60 +1204,15 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str
=
f
"const std::vector<paddle::experimental::Tensor>&
{
name
}
"
arg_str
=
f
"const std::vector<paddle::experimental::Tensor>&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"
{
name
}
"
)
amp_tensors_vector_list
.
append
(
f
"
{
name
}
"
)
amp_autocast_list
.
append
(
amp_autocast_list
.
append
(
f
"auto
NEW
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
f
"auto
new
_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
)
layout_autotune_list
.
append
(
layout_autotune_list
.
append
(
f
"auto
NEW_
{
name
}
= transformer->TransInTensor
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
f
"auto
new_
{
name
}
= transformer->TransInTensors
(
\"
{
name
}
\"
,
{
name
}
);
\n
"
)
)
inputs_args_definition_list
[
pos
]
=
arg_str
inputs_args_definition_list
[
pos
]
=
arg_str
inputs_args_declaration_list
[
pos
]
=
arg_str
inputs_args_declaration_list
[
pos
]
=
arg_str
# for layout autotune attr
lightly_sensitive_attr
=
[
'axis'
,
'axes'
,
'dim'
,
'dims'
,
'start'
,
'end'
,
'stop'
]
heavily_sensitive_attr
=
[
'data_format'
,
'data_layout'
]
layout_autotune_attr
=
[]
layout_autotune_attr_code_list
=
[]
layout_autotune_attr_type_list
=
[]
layout_autotune_attr_code_list
.
append
(
f
"auto op_name = phi::TransToFluidOpName(
\"
{
forward_api_name
}
\"
);
\n
"
)
lightly_flag
=
False
heavily_flag
=
False
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
for
attr_name
in
lightly_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
name
not
in
layout_autotune_attr
:
lightly_flag
=
True
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
if
lightly_flag
is
False
:
for
attr_name
in
heavily_sensitive_attr
:
if
name
.
find
(
attr_name
)
!=
-
1
and
name
not
in
layout_autotune_attr
:
layout_autotune_attr
.
append
(
name
)
layout_autotune_attr_type_list
.
append
(
atype
)
heavily_flag
=
True
if
len
(
layout_autotune_attr
)
==
0
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
1
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
);
\n
"
)
elif
len
(
layout_autotune_attr
)
==
2
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune<
{
layout_autotune_attr_type_list
[
0
]
}
,
{
layout_autotune_attr_type_list
[
1
]
}
>(op_name, tensors_vector, &
{
layout_autotune_attr
[
0
]
}
, &
{
layout_autotune_attr
[
1
]
}
);
\n
"
)
else
:
layout_autotune_attr_code_list
.
append
(
f
"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector,
{
len
(
layout_autotune_attr
)
}
);
\n
"
)
# forward attrs
# forward attrs
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
inputs_call_list
[
pos
]
=
name
inputs_call_list
[
pos
]
=
name
...
@@ -1356,33 +1403,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
...
@@ -1356,33 +1403,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list_str
,
amp_call_str
)
amp_autocast_list_str
,
amp_call_str
)
# Forward layout autotune
# Forward layout autotune
layout_inputs_call_args_str
=
amp_inputs_call_args_str
layout_autotune_list_str
=
" "
.
join
(
layout_tmp_result_list
=
[]
layout_autotune_list
)
+
" "
.
join
(
layout_autotune_optional_list
)
layout_autotune_outs_list
=
""
layout_logic_str
=
self
.
GenerateForwardLayoutAutotune
(
if
num_outputs
==
1
:
forward_api_name
,
amp_tensors_vector_list
,
layout_autotune_outs_list
+=
f
"
{
indent
}
auto
{
returns_str
}
= api_result;
\n
"
layout_tensors_vector_optional_list
,
layout_autotune_list_str
,
layout_autotune_outs_list
+=
f
"
{
indent
}
transformer -> SetOutTensorLayout(&
{
returns_str
}
);
\n
"
returns_type_str
,
returns_str
,
amp_inputs_call_args_str
)
else
:
for
name
,
(
rtype
,
pos
)
in
forward_outputs_position_map
.
items
():
if
name
in
intermediate_outputs
:
continue
layout_autotune_outs_list
+=
f
"
{
indent
}
auto&
{
name
}
= std::get<
{
len
(
layout_tmp_result_list
)
}
>(api_result);
\n
"
layout_autotune_outs_list
+=
f
"
{
indent
}
transformer -> SetOutTensorLayout(&
{
name
}
);
\n
"
layout_tmp_result_list
.
append
(
f
"
{
name
}
"
)
if
returns_type_str
==
"paddle::experimental::Tensor&"
or
forward_api_name
==
"slice"
or
forward_api_name
==
"strided_slice"
or
len
(
layout_autotune_attr
)
==
0
:
layout_logic_str
=
""
else
:
# after_call_str = f"return {forward_ad_function_name}({layout_inputs_call_args_str});\n"
after_call_str
=
f
"auto api_result =
{
forward_ad_function_name
}
(
{
layout_inputs_call_args_str
}
);
\n
"
layout_logic_str
=
LAYOUT_LOGIC_TEMPLATE
.
format
(
amp_tensors_vector_list_str
,
" "
.
join
(
layout_tensors_vector_optional_list
),
" "
.
join
(
layout_autotune_attr_code_list
)
+
" "
+
" "
.
join
(
layout_autotune_list
)
+
" "
.
join
(
layout_autotune_optional_list
),
after_call_str
,
layout_autotune_outs_list
,
returns_str
)
# For inputs outputs prepare for logging
# For inputs outputs prepare for logging
var_str
=
f
"
\n
{
indent
}
std::string input_str =
\"\"
;"
var_str
=
f
"
\n
{
indent
}
std::string input_str =
\"\"
;"
...
...
paddle/fluid/eager/eager_layout_auto_tune.h
浏览文件 @
16439bb9
...
@@ -19,20 +19,65 @@
...
@@ -19,20 +19,65 @@
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace
egr
{
namespace
egr
{
inline
bool
NeedTransLayout
(
// layout_agnostic_ops_
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
// For agnostic op like add / relu
kSlotSmallVectorSize
>&
tensors_vector
,
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
paddle
::
experimental
::
DataLayout
&
layout
)
{
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
layout
!=
tensors_vector
[
i
][
idx
].
layout
())
{
return
true
;
}
}
}
return
false
;
}
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
BaseTransformer
(
const
std
::
string
&
op_name
,
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
{
kSlotSmallVectorSize
>&
tensors_vector
)
{
VLOG
(
3
)
<<
" Optimze Layout agnostic op: "
<<
op_name
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
transposer
=
bool
unstart
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
);
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
VLOG
(
3
)
<<
"Layout autotune was is start ? "
<<
(
!
unstart
)
<<
op_name
<<
"'s layout is "
<<
first_layout
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
return
transposer
;
}
}
// For agnostic op like add, relu, exp
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
{
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
if
(
NeedTransLayout
(
tensors_vector
,
first_layout
))
{
bool
need_trans_back
=
false
;
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
4
!=
tensors_vector
[
i
][
idx
].
shape
().
size
())
{
need_trans_back
=
true
;
VLOG
(
3
)
<<
"Agnostic op "
<<
op_name
<<
" shape is "
<<
tensors_vector
[
i
][
idx
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
i
][
idx
].
layout
();
}
}
}
auto
final_layout
=
need_trans_back
?
default_layout
:
desired_layout
;
return
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
final_layout
);
}
return
BaseTransformer
(
op_name
,
tensors_vector
);
}
// For lightly op like reduce
// For lightly op like reduce
template
<
typename
T
>
template
<
typename
T
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
...
@@ -40,16 +85,11 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
...
@@ -40,16 +85,11 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
kSlotSmallVectorSize
>&
tensors_vector
,
T
*
attr
)
{
T
*
attr
)
{
VLOG
(
3
)
<<
"Lightly op "
<<
op_name
<<
"'s shape is "
<<
tensors_vector
[
0
][
0
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
bool
unstart
=
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
);
if
(
unstart
)
{
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
return
transposer
;
}
transposer
=
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
return
transposer
;
...
@@ -63,33 +103,30 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
...
@@ -63,33 +103,30 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
kSlotSmallVectorSize
>&
tensors_vector
,
kSlotSmallVectorSize
>&
tensors_vector
,
T1
*
axis
,
T1
*
axis
,
T2
*
keep_dim
)
{
T2
*
keep_dim
)
{
VLOG
(
3
)
<<
"Lightly op "
<<
op_name
<<
"'s shape is "
<<
tensors_vector
[
0
][
0
].
shape
().
size
()
<<
" and layout is "
<<
tensors_vector
[
0
][
0
].
layout
();
return
EagerLayoutAutotune
<
T1
>
(
op_name
,
tensors_vector
,
axis
);
return
EagerLayoutAutotune
<
T1
>
(
op_name
,
tensors_vector
,
axis
);
}
}
// heavily string data_format data_layout
// heavily string data_format
,
data_layout
template
<
>
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
(
const
std
::
string
&
op_name
,
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
kSlotSmallVectorSize
>&
tensors_vector
,
std
::
string
*
attr
)
{
std
::
string
*
attr
)
{
VLOG
(
3
)
<<
" Optimze Layout heavily op: "
<<
op_name
;
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
()
;
auto
transposer
=
auto
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
op_name
,
tensors_vector
,
first_layout
);
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
// Layout autotune only supports model with convolutional layers
// Layout autotune only supports model with convolutional layers
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
VLOG
(
3
)
<<
"Optimze Layout was not started
"
<<
op_name
;
if
(
op_name
!=
"conv2d"
)
{
if
(
op_name
!=
"conv2d"
)
{
return
transposer
;
return
transposer
;
}
else
{
}
else
{
#if defined(PADDLE_WITH_CUDA)
if
(
paddle
::
platform
::
is_gpu_place
(
tensors_vector
[
0
][
0
].
place
())
&&
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
return
transposer
;
}
#endif
auto
data_type
=
tensors_vector
[
0
][
0
].
dtype
();
auto
data_type
=
tensors_vector
[
0
][
0
].
dtype
();
bool
is_tune_fp32
=
bool
is_tune_fp32
=
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
)
&&
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
)
&&
...
@@ -97,6 +134,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
...
@@ -97,6 +134,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool
is_tune_fp16
=
bool
is_tune_fp16
=
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
&&
(
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
&&
(
*
attr
==
"NCHW"
);
(
*
attr
==
"NCHW"
);
VLOG
(
3
)
<<
"Conv2d_dy's dtype "
<<
data_type
<<
" format"
<<
(
*
attr
);
if
(
is_tune_fp32
)
{
if
(
is_tune_fp32
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
paddle
::
experimental
::
DataLayout
::
NCHW
);
paddle
::
experimental
::
DataLayout
::
NCHW
);
...
@@ -109,26 +147,27 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
...
@@ -109,26 +147,27 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDefaultLayout
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
SetDefaultLayout
(
paddle
::
experimental
::
DataLayout
::
NCHW
);
paddle
::
experimental
::
DataLayout
::
NCHW
);
}
else
{
}
else
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
return
transposer
;
return
transposer
;
}
}
VLOG
(
3
)
<<
"Tune the layout from "
<<
attr
<<
" to "
VLOG
(
3
)
<<
paddle
::
framework
::
DataLayoutToString
(
<<
"Tune the layout from "
<<
*
attr
<<
" to "
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
<<
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
.
GetDesiredLayout
());
}
}
}
}
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
IsHeavilyLayoutSensitive
(
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
IsHeavilyLayoutSensitive
(
op_name
))
{
op_name
))
{
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer"
;
auto
heavily_transposer
=
auto
heavily_transposer
=
std
::
make_shared
<
EagerHeavilyLayoutSensitiveOpTransformer
>
(
op_name
,
std
::
make_shared
<
EagerHeavilyLayoutSensitiveOpTransformer
>
(
op_name
,
attr
);
attr
);
return
heavily_transposer
;
return
heavily_transposer
;
}
}
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is unimplemented. Use default "
VLOG
(
3
)
<<
op_name
<<
"'s LayoutTransformer is unimplemented. Use default."
;
"LayoutTransformer instead."
;
return
transposer
;
return
transposer
;
}
}
...
@@ -139,24 +178,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
...
@@ -139,24 +178,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
kSlotSmallVectorSize
>&
tensors_vector
,
std
::
vector
<
int
>*
attr
)
{
std
::
vector
<
int
>*
attr
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
return
transposer
;
}
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
if
(
op_name
==
"transpose2"
&&
if
(
op_name
==
"transpose2"
)
{
(
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()))
{
auto
trans
=
std
::
make_shared
<
EagerTransposeOpTransformer
>
(
op_name
);
auto
trans
=
std
::
make_shared
<
EagerTransposeOpTransformer
>
(
op_name
);
if
(
tensors_vector
[
0
][
0
].
layout
()
==
trans
->
SetAttr
(
attr
,
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
())
{
tensors_vector
[
0
][
0
].
layout
()
==
trans
->
SetAttr
(
attr
,
paddle
::
experimental
::
DataLayout
::
NHWC
);
tensors_vector
[
0
][
0
].
layout
()
==
return
trans
;
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
trans
;
}
}
}
transposer
=
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
...
@@ -172,33 +210,32 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
...
@@ -172,33 +210,32 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize
>&
tensors_vector
,
kSlotSmallVectorSize
>&
tensors_vector
,
paddle
::
experimental
::
Scalar
*
axis
,
paddle
::
experimental
::
Scalar
*
axis
,
bool
*
keep_dim
)
{
bool
*
keep_dim
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
return
transposer
;
}
}
auto
desired_layout
=
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
op_name
==
"argmax"
)
{
if
(
op_name
==
"argmax"
&&
(
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
)
&&
(
*
keep_dim
))
{
std
::
shared_ptr
<
EagerArgmaxOpTransformer
>
argmax_transform
=
nullptr
;
std
::
shared_ptr
<
EagerArgmaxOpTransformer
>
argmax_transform
=
nullptr
;
argmax_transform
=
std
::
make_shared
<
EagerArgmaxOpTransformer
>
(
op_name
);
argmax_transform
=
std
::
make_shared
<
EagerArgmaxOpTransformer
>
(
op_name
);
if
((
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
)
&&
(
*
keep_dim
))
{
argmax_transform
->
SetAttr
(
axis
,
argmax_transform
->
SetAttr
(
axis
,
tensors_vector
[
0
][
0
].
layout
()
==
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
experimental
::
DataLayout
::
NHWC
);
paddle
::
experimental
::
DataLayout
::
NHWC
);
return
argmax_transform
;
return
argmax_transform
;
}
}
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
transposer
=
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
return
transposer
;
}
}
// lightly
int
flatten
// lightly
for
flatten
template
<
>
template
<
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
int
,
int
>
(
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
int
,
int
>
(
const
std
::
string
&
op_name
,
const
std
::
string
&
op_name
,
...
@@ -206,17 +243,17 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
...
@@ -206,17 +243,17 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
kSlotSmallVectorSize
>&
tensors_vector
,
kSlotSmallVectorSize
>&
tensors_vector
,
int
*
start_axis
,
int
*
start_axis
,
int
*
stop_axis
)
{
int
*
stop_axis
)
{
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
()
==
auto
desired_layout
=
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
VLOG
(
3
)
<<
" Optimze Layout Unstarted : "
<<
op_name
;
if
(
desired_layout
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
transposer
=
VLOG
(
3
)
<<
"Optimze Layout was not started"
<<
op_name
;
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
return
transposer
;
}
}
bool
no_tranpose
=
bool
no_tranpose
=
tensors_vector
[
0
][
0
].
layout
()
==
desired_layout
;
tensors_vector
[
0
][
0
].
layout
()
==
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
bool
is_valid
=
((
*
start_axis
)
==
1
&&
(
*
stop_axis
)
==
3
);
bool
is_valid
=
((
*
start_axis
)
==
1
&&
(
*
stop_axis
)
==
3
);
if
(
op_name
==
"flatten"
||
op_name
==
"flatten_contiguous_range"
)
{
if
(
op_name
==
"flatten"
||
op_name
==
"flatten_contiguous_range"
)
{
if
(
no_tranpose
&&
is_valid
)
{
if
(
no_tranpose
&&
is_valid
)
{
...
@@ -226,15 +263,13 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
...
@@ -226,15 +263,13 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
}
}
}
}
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
transposer
=
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
return
transposer
;
}
}
// lightly int Concat
// lightly int Concat
// lightly T can be int vector<int> vector<int64_t> IntArray
template
<
>
template
<
>
// default int
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
inline
std
::
shared_ptr
<
EagerLayoutTransformer
>
EagerLayoutAutotune
<
paddle
::
experimental
::
Scalar
>
(
EagerLayoutAutotune
<
paddle
::
experimental
::
Scalar
>
(
const
std
::
string
&
op_name
,
const
std
::
string
&
op_name
,
...
@@ -243,30 +278,21 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
...
@@ -243,30 +278,21 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
paddle
::
experimental
::
Scalar
*
axis
)
{
paddle
::
experimental
::
Scalar
*
axis
)
{
auto
desired_layout
=
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
auto
first_layout
=
tensors_vector
[
0
][
0
].
layout
();
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
std
::
shared_ptr
<
EagerLayoutTransformer
>
transposer
=
nullptr
;
if
(
desired_layout
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
if
(
desired_layout
==
paddle
::
experimental
::
DataLayout
::
UNDEFINED
)
{
VLOG
(
3
)
<<
"
Optimze Layout Unstarted :
"
<<
op_name
;
VLOG
(
3
)
<<
"
Optimze Layout was not started
"
<<
op_name
;
transposer
=
transposer
=
std
::
make_shared
<
EagerLayoutTransformer
>
(
std
::
make_shared
<
EagerLayoutTransformer
>
(
op_name
,
tensors_vector
);
op_name
,
tensors_vector
,
first_layout
);
return
transposer
;
return
transposer
;
}
}
bool
need_transpose
=
false
;
if
(
NeedTransLayout
(
tensors_vector
,
desired_layout
))
{
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
VLOG
(
3
)
<<
op_name
<<
" need transpose to default layout"
;
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
desired_layout
!=
tensors_vector
[
i
][
idx
].
layout
())
{
need_transpose
=
true
;
}
}
}
if
(
need_transpose
)
{
VLOG
(
3
)
<<
"Concat need transpose to NCHW "
<<
op_name
;
transposer
=
transposer
=
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
std
::
make_shared
<
EagerLightlyLayoutSensitiveOpTransformer
>
(
op_name
);
return
transposer
;
return
transposer
;
}
else
{
}
else
{
VLOG
(
3
)
<<
" Optimze Layout lightly op: "
<<
op_name
;
auto
trans
=
std
::
make_shared
<
EagerConcatOpTransformer
>
(
op_name
);
auto
trans
=
std
::
make_shared
<
EagerConcatOpTransformer
>
(
op_name
);
trans
->
SetAttr
(
axis
,
desired_layout
);
trans
->
SetAttr
(
axis
,
desired_layout
);
return
trans
;
return
trans
;
...
...
paddle/fluid/eager/eager_layout_transformer.h
浏览文件 @
16439bb9
...
@@ -22,9 +22,9 @@ namespace egr {
...
@@ -22,9 +22,9 @@ namespace egr {
inline
paddle
::
experimental
::
Tensor
EagerTraceTransposeOp
(
inline
paddle
::
experimental
::
Tensor
EagerTraceTransposeOp
(
const
paddle
::
experimental
::
DataLayout
layout
,
const
paddle
::
experimental
::
DataLayout
layout
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
const
paddle
::
experimental
::
Tensor
&
in
)
{
VLOG
(
4
)
<<
"AutoTune Transpose from "
<<
in
.
layout
()
<<
" to "
<<
layout
<<
", tensor's shape is "
<<
in
.
shape
().
size
();
if
(
in
.
shape
().
size
()
!=
4
)
{
if
(
in
.
shape
().
size
()
!=
4
)
{
VLOG
(
4
)
<<
"Shape is "
<<
in
.
shape
().
size
()
<<
" can't transpose to"
<<
paddle
::
framework
::
DataLayoutToString
(
layout
);
return
in
;
return
in
;
}
}
std
::
vector
<
int
>
axis
;
std
::
vector
<
int
>
axis
;
...
@@ -44,77 +44,75 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
...
@@ -44,77 +44,75 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
// agnostic op
// agnostic op
class
EagerLayoutTransformer
{
class
EagerLayoutTransformer
{
using
Layout
=
paddle
::
experimental
::
DataLayout
;
public:
public:
EagerLayoutTransformer
()
:
op_name_
(
""
)
{}
EagerLayoutTransformer
()
:
op_name_
(
""
),
final_layout_
(
Layout
::
UNDEFINED
)
{}
explicit
EagerLayoutTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
)
:
op_name_
(
op_name
)
{
final_layout_
=
"UNDEFINED"
;
auto
desired_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
for
(
size_t
i
=
0
;
i
<
tensors_vector
.
size
();
i
++
)
{
for
(
size_t
idx
=
0
;
idx
<
tensors_vector
[
0
].
size
();
idx
++
)
{
if
(
final_layout_
==
"UNDEFINED"
)
{
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
tensors_vector
[
0
][
0
].
layout
());
}
else
if
(
tensors_vector
[
i
][
idx
].
layout
()
==
desired_layout
)
{
final_layout_
=
paddle
::
framework
::
DataLayoutToString
(
desired_layout
);
break
;
}
}
}
VLOG
(
4
)
<<
op_name_
<<
"final_layout_ is "
<<
final_layout_
;
}
EagerLayoutTransformer
(
const
EagerLayoutTransformer
&
)
=
delete
;
EagerLayoutTransformer
(
const
EagerLayoutTransformer
&
)
=
delete
;
EagerLayoutTransformer
&
operator
=
(
const
EagerLayoutTransformer
&
)
=
delete
;
EagerLayoutTransformer
&
operator
=
(
const
EagerLayoutTransformer
&
)
=
delete
;
explicit
EagerLayoutTransformer
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
tensors_vector
,
const
Layout
final_layout
=
Layout
::
UNDEFINED
)
:
op_name_
(
op_name
),
final_layout_
(
final_layout
)
{
VLOG
(
4
)
<<
"Agnostic op : "
<<
op_name_
<<
" final_layout_ is "
<<
final_layout_
;
}
virtual
~
EagerLayoutTransformer
()
{}
virtual
~
EagerLayoutTransformer
()
{}
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
if
(
final_layout_
==
Layout
::
UNDEFINED
||
final_layout_
==
in
.
layout
())
{
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, final_layout_ is "
<<
final_layout_
;
VLOG
(
4
)
<<
"EagerLayoutTransformer with no trans"
;
return
in
;
return
in
;
}
else
{
// from NCHW to NHWC
VLOG
(
4
)
<<
"EagerLayoutTransformer with trans from "
<<
in
.
layout
()
<<
" to "
<<
final_layout_
;
auto
out_tensor
=
EagerTraceTransposeOp
(
final_layout_
,
in
);
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
.
impl
().
get
()))
->
layout
=
final_layout_
;
return
out_tensor
;
}
}
}
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
TransInTensor
(
const
std
::
string
&
in_name
,
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
>&
in
)
{
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
return
in
;
return
in
?
TransInTensor
(
in_name
,
*
in
)
:
in
;
}
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
" TransInTensor"
;
return
in
;
return
in
;
}
}
virtual
paddle
::
experimental
::
Tensor
TransInTensor
(
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
TransInTensors
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
in
)
{
VLOG
(
4
)
<<
" TransInTensor"
;
if
(
in
)
{
return
TransInTensors
(
in_name
,
*
in
);
}
return
in
;
return
in
;
}
}
virtual
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
virtual
void
SetOutTensorLayout
(
bool
use_default
=
(
final_layout_
==
"Undefined(AnyLayout)"
||
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
final_layout_
==
(
"UNDEFINED"
));
VLOG
(
4
)
<<
"optional out_tensor"
;
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
if
(
!
use_default
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
}
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
}
virtual
void
SetOutTensorLayout
(
virtual
void
SetOutTensorLayout
(
std
::
vector
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
std
::
vector
<
paddle
::
experimental
::
Tensor
>*
out_tensor
)
{
bool
use_default
=
(
final_layout_
==
"Undefined(AnyLayout)"
||
bool
use_default
=
(
final_layout_
==
Layout
::
UNDEFINED
);
final_layout_
==
(
"UNDEFINED"
));
if
(
!
use_default
)
{
if
(
!
use_default
)
{
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
out_tensor
->
size
();
i
++
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
phi
::
DenseTensorUtils
::
GetMutableMeta
(
...
@@ -126,9 +124,24 @@ class EagerLayoutTransformer {
...
@@ -126,9 +124,24 @@ class EagerLayoutTransformer {
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
}
virtual
void
SetOutTensorLayout
(
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>*
out_tensor
)
{
VLOG
(
4
)
<<
"optional out_tensor"
;
}
virtual
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
bool
use_default
=
final_layout_
==
Layout
::
UNDEFINED
;
if
(
!
use_default
)
{
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
final_layout_
;
}
VLOG
(
4
)
<<
op_name_
<<
"is is agnostic, use_default "
<<
use_default
;
}
protected:
protected:
std
::
string
op_name_
;
std
::
string
op_name_
;
std
::
string
final_layout_
;
const
Layout
final_layout_
;
};
};
class
EagerHeavilyLayoutSensitiveOpTransformer
:
public
EagerLayoutTransformer
{
class
EagerHeavilyLayoutSensitiveOpTransformer
:
public
EagerLayoutTransformer
{
...
@@ -145,21 +158,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
...
@@ -145,21 +158,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
}
}
}
}
virtual
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is heavily"
;
return
in
;
}
virtual
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
optional
<
paddle
::
experimental
::
Tensor
>&
in
)
{
VLOG
(
4
)
<<
op_name_
<<
"is is heavily"
;
return
in
;
}
paddle
::
experimental
::
Tensor
TransInTensor
(
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
if
(
heavily_input_
.
count
(
in_name
)
!=
0
&&
in
.
layout
()
!=
desired_layout_
)
{
if
(
heavily_input_
.
count
(
in_name
)
!=
0
&&
in
.
layout
()
!=
desired_layout_
)
{
...
@@ -230,7 +228,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
...
@@ -230,7 +228,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
auto
default_layout
=
auto
default_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDefaultLayout
();
if
(
final_layout_
==
input_layout
&&
in
.
shape
().
size
()
==
4
)
{
if
(
final_layout_
==
input_layout
&&
in
.
shape
().
size
()
==
4
)
{
VLOG
(
4
)
<<
op_name_
<<
"'s "
<<
in_name
<<
" need transpose from "
VLOG
(
4
)
<<
op_name_
<<
"'s "
<<
in_name
<<
" need transpose from "
<<
input_layout
<<
" to default_layout"
;
<<
input_layout
<<
" to default_layout"
;
...
@@ -245,7 +242,7 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
...
@@ -245,7 +242,7 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
return
in
;
return
in
;
}
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
std
::
vector
<
paddle
::
experimental
::
Tensor
>
result
;
std
::
vector
<
paddle
::
experimental
::
Tensor
>
result
;
...
@@ -340,22 +337,19 @@ class EagerTransposeOpTransformer
...
@@ -340,22 +337,19 @@ class EagerTransposeOpTransformer
paddle
::
experimental
::
Tensor
TransInTensor
(
paddle
::
experimental
::
Tensor
TransInTensor
(
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
const
std
::
string
&
in_name
,
const
paddle
::
experimental
::
Tensor
&
in
)
{
VLOG
(
4
)
<<
"with no transpose: EagerTransposeOpTransformer "
<<
in_name
<<
"'s layout is "
<<
paddle
::
framework
::
DataLayoutToString
(
in
.
layout
());
return
in
;
return
in
;
}
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
auto
de
sired
_layout
=
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
if
(
out_tensor
->
layout
()
!=
de
sired
_layout
)
{
if
(
out_tensor
->
layout
()
!=
de
fault
_layout
)
{
VLOG
(
4
)
<<
" Set Out_tensor's layout from "
VLOG
(
4
)
<<
" Set Out_tensor's layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
default_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
de
sired
_layout
;
->
layout
=
de
fault
_layout
;
}
}
}
}
...
@@ -385,15 +379,15 @@ class EagerArgmaxOpTransformer
...
@@ -385,15 +379,15 @@ class EagerArgmaxOpTransformer
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
VLOG
(
4
)
<<
"EagerArgmaxOpTransformer's out layout is"
VLOG
(
4
)
<<
"EagerArgmaxOpTransformer's out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
auto
de
sired
_layout
=
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
if
(
de
sired
_layout
!=
out_tensor
->
layout
())
{
if
(
de
fault
_layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
default_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
de
sired
_layout
;
->
layout
=
de
fault
_layout
;
}
}
}
}
...
@@ -410,11 +404,11 @@ class EagerFlattenOpTransformer
...
@@ -410,11 +404,11 @@ class EagerFlattenOpTransformer
explicit
EagerFlattenOpTransformer
(
const
std
::
string
&
op_name
)
explicit
EagerFlattenOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
de
sired
_layout
=
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
std
::
string
de
sired
_layout_str
=
std
::
string
de
fault
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
sired
_layout
);
paddle
::
framework
::
DataLayoutToString
(
de
fault
_layout
);
final_layout_
=
de
sired
_layout_str
;
final_layout_
=
de
fault
_layout_str
;
}
}
// transpose from NHWC to NCHW
// transpose from NHWC to NCHW
...
@@ -424,16 +418,17 @@ class EagerFlattenOpTransformer
...
@@ -424,16 +418,17 @@ class EagerFlattenOpTransformer
}
}
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
void
SetOutTensorLayout
(
paddle
::
experimental
::
Tensor
*
out_tensor
)
{
VLOG
(
4
)
<<
"Eager
Argmax
OpTransformer's out layout is"
VLOG
(
4
)
<<
"Eager
Flatten
OpTransformer's out layout is"
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
());
auto
layout
=
paddle
::
framework
::
StringToDataLayout
(
final_layout_
);
auto
desired_layout
=
if
(
layout
!=
out_tensor
->
layout
())
{
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDesiredLayout
();
if
(
desired_layout
!=
out_tensor
->
layout
())
{
VLOG
(
4
)
<<
"Change layout from "
VLOG
(
4
)
<<
"Change layout from "
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
paddle
::
framework
::
DataLayoutToString
(
out_tensor
->
layout
())
<<
" to "
<<
final_layout_
;
<<
" to "
<<
desired_layout
;
phi
::
DenseTensorUtils
::
GetMutableMeta
(
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
static_cast
<
phi
::
DenseTensor
*>
(
out_tensor
->
impl
().
get
()))
->
layout
=
layout
;
->
layout
=
desired_
layout
;
}
}
}
}
...
@@ -450,11 +445,11 @@ class EagerConcatOpTransformer
...
@@ -450,11 +445,11 @@ class EagerConcatOpTransformer
explicit
EagerConcatOpTransformer
(
const
std
::
string
&
op_name
)
explicit
EagerConcatOpTransformer
(
const
std
::
string
&
op_name
)
:
op_name_
(
op_name
)
{
:
op_name_
(
op_name
)
{
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
VLOG
(
3
)
<<
"Optimze Layout lightly "
<<
op_name
;
auto
de
sired
_layout
=
auto
de
fault
_layout
=
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
sired
Layout
();
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
GetDe
fault
Layout
();
std
::
string
de
sired
_layout_str
=
std
::
string
de
fault
_layout_str
=
paddle
::
framework
::
DataLayoutToString
(
de
sired
_layout
);
paddle
::
framework
::
DataLayoutToString
(
de
fault
_layout
);
final_layout_
=
de
sired
_layout_str
;
final_layout_
=
de
fault
_layout_str
;
}
}
void
SetAttr
(
paddle
::
experimental
::
Scalar
*
axis
,
void
SetAttr
(
paddle
::
experimental
::
Scalar
*
axis
,
...
@@ -467,7 +462,7 @@ class EagerConcatOpTransformer
...
@@ -467,7 +462,7 @@ class EagerConcatOpTransformer
(
*
axis
)
=
static_cast
<
paddle
::
experimental
::
Scalar
>
(
perm
[
axes
]);
(
*
axis
)
=
static_cast
<
paddle
::
experimental
::
Scalar
>
(
perm
[
axes
]);
}
}
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
(
virtual
std
::
vector
<
paddle
::
experimental
::
Tensor
>
TransInTensor
s
(
const
std
::
string
&
in_name
,
const
std
::
string
&
in_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
in
)
{
return
in
;
return
in
;
...
...
paddle/fluid/imperative/layout_autotune.cc
浏览文件 @
16439bb9
...
@@ -14,23 +14,15 @@
...
@@ -14,23 +14,15 @@
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/imperative/layout_transformer.h"
#include "paddle/fluid/imperative/layout_transformer.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/errors.h"
namespace
paddle
{
namespace
paddle
{
namespace
imperative
{
namespace
imperative
{
bool
LayoutAutoTune
::
UseLayoutAutoTune
()
const
{
#if defined(PADDLE_WITH_CUDA)
return
use_layout_autotune_
;
#else
return
false
;
#endif
}
LayoutAutoTune
::
LayoutAutoTune
()
{
LayoutAutoTune
::
LayoutAutoTune
()
{
const
auto
&
op_info
=
paddle
::
framework
::
OpInfoMap
::
Instance
().
map
();
const
auto
&
op_info
=
paddle
::
framework
::
OpInfoMap
::
Instance
().
map
();
for
(
auto
it
=
op_info
.
begin
();
it
!=
op_info
.
end
();
it
++
)
{
for
(
auto
it
=
op_info
.
begin
();
it
!=
op_info
.
end
();
it
++
)
{
...
@@ -140,6 +132,26 @@ paddle::imperative::NameVarMap<VarType> DealLightlyLayoutSensitive(
...
@@ -140,6 +132,26 @@ paddle::imperative::NameVarMap<VarType> DealLightlyLayoutSensitive(
return
transposer
->
Apply
(
ins
,
outs
,
attrs
,
tracer
);
return
transposer
->
Apply
(
ins
,
outs
,
attrs
,
tracer
);
}
}
LayoutAutotuneGuard
::
LayoutAutotuneGuard
(
std
::
shared_ptr
<
Tracer
>
tracer
,
bool
use_autotune
)
:
tracer_
(
tracer
)
{
pre_layout_autotune_
=
tracer_
->
UseLayoutAutoTune
();
if
(
pre_layout_autotune_
!=
use_autotune
)
{
tracer_
->
EnableLayoutAutoTune
();
if
(
!
use_autotune
)
{
tracer_
->
DisableLayoutAutoTune
();
}
}
}
LayoutAutotuneGuard
::~
LayoutAutotuneGuard
()
{
if
(
pre_layout_autotune_
)
{
tracer_
->
EnableLayoutAutoTune
();
}
else
{
tracer_
->
DisableLayoutAutoTune
();
}
}
template
<
typename
VarType
>
template
<
typename
VarType
>
paddle
::
imperative
::
NameVarMap
<
VarType
>
AutoTuneLayout
(
paddle
::
imperative
::
NameVarMap
<
VarType
>
AutoTuneLayout
(
const
std
::
string
&
op_type
,
const
std
::
string
&
op_type
,
...
@@ -147,7 +159,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
...
@@ -147,7 +159,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const
paddle
::
imperative
::
NameVarMap
<
VarType
>&
outs
,
const
paddle
::
imperative
::
NameVarMap
<
VarType
>&
outs
,
paddle
::
framework
::
AttributeMap
*
attrs
,
paddle
::
framework
::
AttributeMap
*
attrs
,
const
std
::
shared_ptr
<
imperative
::
Tracer
>&
tracer
)
{
const
std
::
shared_ptr
<
imperative
::
Tracer
>&
tracer
)
{
if
(
!
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
())
{
if
(
!
tracer
->
UseLayoutAutoTune
())
{
return
ins
;
return
ins
;
}
}
// When layout autotuning is enabled, the tuner will check the desired layout.
// When layout autotuning is enabled, the tuner will check the desired layout.
...
@@ -165,7 +177,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
...
@@ -165,7 +177,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
}
else
{
}
else
{
#if defined(PADDLE_WITH_CUDA)
#if defined(PADDLE_WITH_CUDA)
if
(
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
if
(
!
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
tracer
->
DisableLayoutAutoTune
();
return
ins
;
return
ins
;
}
}
#endif
#endif
...
@@ -185,7 +197,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
...
@@ -185,7 +197,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
}
else
if
(
is_tune_fp16
)
{
}
else
if
(
is_tune_fp16
)
{
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
DataLayout
::
NHWC
);
LayoutAutoTune
::
Instance
().
SetDesiredLayout
(
DataLayout
::
NHWC
);
}
else
{
}
else
{
LayoutAutoTune
::
Instance
().
DisableLayoutAutoTune
();
tracer
->
DisableLayoutAutoTune
();
return
ins
;
return
ins
;
}
}
VLOG
(
3
)
<<
"Tune the layout from "
VLOG
(
3
)
<<
"Tune the layout from "
...
...
paddle/fluid/imperative/layout_autotune.h
浏览文件 @
16439bb9
...
@@ -19,8 +19,8 @@
...
@@ -19,8 +19,8 @@
#include <unordered_set>
#include <unordered_set>
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/layout.h"
namespace
paddle
{
namespace
paddle
{
namespace
imperative
{
namespace
imperative
{
...
@@ -35,12 +35,6 @@ class LayoutAutoTune {
...
@@ -35,12 +35,6 @@ class LayoutAutoTune {
return
layout_autoTune
;
return
layout_autoTune
;
}
}
bool
UseLayoutAutoTune
()
const
;
void
EnableLayoutAutoTune
()
{
use_layout_autotune_
=
true
;
}
void
DisableLayoutAutoTune
()
{
use_layout_autotune_
=
false
;
}
bool
IsHeavilyLayoutSensitive
(
const
std
::
string
&
op_type
)
const
{
bool
IsHeavilyLayoutSensitive
(
const
std
::
string
&
op_type
)
const
{
return
heavily_layout_sensitive_ops_
.
count
(
op_type
)
!=
0
;
return
heavily_layout_sensitive_ops_
.
count
(
op_type
)
!=
0
;
}
}
...
@@ -64,8 +58,6 @@ class LayoutAutoTune {
...
@@ -64,8 +58,6 @@ class LayoutAutoTune {
private:
private:
LayoutAutoTune
();
LayoutAutoTune
();
bool
use_layout_autotune_
{
false
};
std
::
unordered_set
<
std
::
string
>
layout_agnostic_ops_
{};
std
::
unordered_set
<
std
::
string
>
layout_agnostic_ops_
{};
std
::
unordered_set
<
std
::
string
>
heavily_layout_sensitive_ops_
{
"batch_norm"
};
std
::
unordered_set
<
std
::
string
>
heavily_layout_sensitive_ops_
{
"batch_norm"
};
...
@@ -73,11 +65,29 @@ class LayoutAutoTune {
...
@@ -73,11 +65,29 @@ class LayoutAutoTune {
std
::
unordered_set
<
std
::
string
>
lightly_layout_sensitive_ops_
{
std
::
unordered_set
<
std
::
string
>
lightly_layout_sensitive_ops_
{
"instance_norm"
,
"softmax"
,
"transpose"
,
"transpose2"
,
"reshape2"
};
"instance_norm"
,
"softmax"
,
"transpose"
,
"transpose2"
,
"reshape2"
};
// Best Layout in this platform
DataLayout
desired_layout_
{
DataLayout
::
UNDEFINED
};
DataLayout
desired_layout_
{
DataLayout
::
UNDEFINED
};
// Default Layout in this model
DataLayout
default_layout_
{
DataLayout
::
UNDEFINED
};
DataLayout
default_layout_
{
DataLayout
::
UNDEFINED
};
};
};
// LayoutAutotuneGuard is used for RAII.
class
LayoutAutotuneGuard
{
public:
LayoutAutotuneGuard
(
std
::
shared_ptr
<
Tracer
>
tracer
,
bool
use_autotune
);
~
LayoutAutotuneGuard
();
// forbid copy and operator=
LayoutAutotuneGuard
(
const
LayoutAutotuneGuard
&
guard
)
=
delete
;
LayoutAutotuneGuard
&
operator
=
(
const
LayoutAutotuneGuard
&
guard
)
=
delete
;
private:
std
::
shared_ptr
<
Tracer
>
tracer_
;
bool
pre_layout_autotune_
;
};
template
<
typename
VarType
>
template
<
typename
VarType
>
paddle
::
imperative
::
NameVarMap
<
VarType
>
AutoTuneLayout
(
paddle
::
imperative
::
NameVarMap
<
VarType
>
AutoTuneLayout
(
const
std
::
string
&
op_type
,
const
std
::
string
&
op_type
,
...
...
paddle/fluid/imperative/layout_transformer.h
浏览文件 @
16439bb9
...
@@ -19,8 +19,24 @@
...
@@ -19,8 +19,24 @@
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/tensor_utils.h"
namespace
paddle
{
namespace
paddle
{
namespace
imperative
{
namespace
imperative
{
template
<
typename
VarType
>
void
SetOutDataLayout
(
std
::
shared_ptr
<
VarType
>
var
,
const
paddle
::
experimental
::
DataLayout
layout
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
// set out_tensor's layout
if
(
var
->
MutableVar
()
->
IsInitialized
())
{
paddle
::
framework
::
Variable
*
tmp_var
=
var
->
MutableVar
();
auto
*
out
=
tmp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
phi
::
DenseTensorUtils
::
GetMutableMeta
(
static_cast
<
framework
::
LoDTensor
*>
(
out
))
->
layout
=
layout
;
}
}
}
template
<
typename
VarType
>
template
<
typename
VarType
>
std
::
shared_ptr
<
VarType
>
TraceTransposeOp
(
std
::
shared_ptr
<
VarType
>
TraceTransposeOp
(
...
@@ -118,7 +134,7 @@ class LayoutTransformer {
...
@@ -118,7 +134,7 @@ class LayoutTransformer {
auto
out_vars
=
outs
.
at
(
name
);
auto
out_vars
=
outs
.
at
(
name
);
for
(
auto
&
var
:
out_vars
)
{
for
(
auto
&
var
:
out_vars
)
{
if
(
var
!=
nullptr
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
paddle
::
imperative
::
Set
Out
DataLayout
(
var
,
layout
);
}
}
}
}
not_in_out
=
false
;
not_in_out
=
false
;
...
@@ -130,7 +146,7 @@ class LayoutTransformer {
...
@@ -130,7 +146,7 @@ class LayoutTransformer {
for
(
auto
&
pair
:
outs
)
{
for
(
auto
&
pair
:
outs
)
{
for
(
auto
&
var
:
pair
.
second
)
{
for
(
auto
&
var
:
pair
.
second
)
{
if
(
var
!=
nullptr
)
{
if
(
var
!=
nullptr
)
{
paddle
::
imperative
::
SetDataLayout
(
var
,
layout
);
paddle
::
imperative
::
Set
Out
DataLayout
(
var
,
layout
);
}
}
}
}
}
}
...
...
paddle/fluid/imperative/tracer.cc
浏览文件 @
16439bb9
...
@@ -42,6 +42,8 @@ thread_local bool Tracer::enable_program_desc_tracing_ = false;
...
@@ -42,6 +42,8 @@ thread_local bool Tracer::enable_program_desc_tracing_ = false;
thread_local
bool
Tracer
::
has_grad_
=
true
;
thread_local
bool
Tracer
::
has_grad_
=
true
;
thread_local
bool
Tracer
::
use_layout_autotune_
=
false
;
thread_local
AmpLevel
Tracer
::
amp_level_
=
AmpLevel
::
O0
;
thread_local
AmpLevel
Tracer
::
amp_level_
=
AmpLevel
::
O0
;
thread_local
phi
::
DataType
Tracer
::
amp_dtype_
=
phi
::
DataType
::
FLOAT32
;
thread_local
phi
::
DataType
Tracer
::
amp_dtype_
=
phi
::
DataType
::
FLOAT32
;
...
...
paddle/fluid/imperative/tracer.h
浏览文件 @
16439bb9
...
@@ -28,9 +28,9 @@
...
@@ -28,9 +28,9 @@
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/core/compat/arg_map_context.h"
#include "paddle/phi/core/compat/arg_map_context.h"
namespace
paddle
{
namespace
paddle
{
namespace
imperative
{
namespace
imperative
{
...
@@ -184,6 +184,20 @@ class Tracer {
...
@@ -184,6 +184,20 @@ class Tracer {
}
}
}
}
void
DisableLayoutAutoTune
()
{
use_layout_autotune_
=
false
;
}
void
EnableLayoutAutoTune
()
{
use_layout_autotune_
=
true
;
}
bool
UseLayoutAutoTune
()
{
#if defined(PADDLE_WITH_CUDA)
if
(
phi
::
backends
::
gpu
::
TensorCoreAvailable
())
{
return
use_layout_autotune_
;
}
#endif
use_layout_autotune_
=
false
;
return
false
;
}
phi
::
KernelSignature
GetExpectedKernelSignature
(
phi
::
KernelSignature
GetExpectedKernelSignature
(
const
std
::
string
&
type
,
const
std
::
string
&
type
,
const
NameTensorMap
&
ins
,
const
NameTensorMap
&
ins
,
...
@@ -199,8 +213,8 @@ class Tracer {
...
@@ -199,8 +213,8 @@ class Tracer {
std
::
unique_ptr
<
UniqueNameGenerator
>
generator_
;
std
::
unique_ptr
<
UniqueNameGenerator
>
generator_
;
platform
::
Place
expected_place_
;
platform
::
Place
expected_place_
;
GarbageCollectorMap
gcs_
;
GarbageCollectorMap
gcs_
;
static
thread_local
bool
enable_program_desc_tracing_
;
static
thread_local
bool
enable_program_desc_tracing_
;
static
thread_local
bool
use_layout_autotune_
;
static
thread_local
bool
has_grad_
;
static
thread_local
bool
has_grad_
;
static
thread_local
AmpLevel
amp_level_
;
static
thread_local
AmpLevel
amp_level_
;
static
thread_local
phi
::
DataType
amp_dtype_
;
static
thread_local
phi
::
DataType
amp_dtype_
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
16439bb9
...
@@ -2503,19 +2503,14 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -2503,19 +2503,14 @@ All parameter, weight, gradient are variables in Paddle.
return
res
;
return
res
;
});
});
m
.
def
(
"enable_layout_autotune"
,
[]
{
m
.
def
(
"enable_layout_autotune"
,
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
[]
{
return
egr
::
Controller
::
Instance
().
EnableLayoutAutoTune
();
});
.
EnableLayoutAutoTune
();
});
m
.
def
(
"disable_layout_autotune"
,
[]
{
m
.
def
(
"disable_layout_autotune"
,
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
()
[]
{
return
egr
::
Controller
::
Instance
().
DisableLayoutAutoTune
();
});
.
DisableLayoutAutoTune
();
});
m
.
def
(
"use_layout_autotune"
,
[]
{
m
.
def
(
"use_layout_autotune"
,
return
paddle
::
imperative
::
LayoutAutoTune
::
Instance
().
UseLayoutAutoTune
();
[]
{
return
egr
::
Controller
::
Instance
().
UseLayoutAutoTune
();
});
});
BindFleetWrapper
(
&
m
);
BindFleetWrapper
(
&
m
);
BindIO
(
&
m
);
BindIO
(
&
m
);
...
...
paddle/phi/api/lib/data_transform.cc
浏览文件 @
16439bb9
...
@@ -52,9 +52,9 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input,
...
@@ -52,9 +52,9 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input,
return
ret
;
return
ret
;
}
}
inline
bool
NeedTransformLayout
(
const
paddle
::
platform
::
Place
&
place
,
inline
bool
NeedTransformLayout
(
const
DataLayout
&
input
,
const
DataLayout
&
input
,
const
DataLayout
&
target
,
const
DataLayout
&
target
,
const
paddle
::
platform
::
Place
&
place
,
const
TransformFlag
&
transform_flag
)
{
const
TransformFlag
&
transform_flag
)
{
bool
ret
=
transform_flag
.
need_trans_layout
()
&&
bool
ret
=
transform_flag
.
need_trans_layout
()
&&
(
input
!=
DataLayout
::
ALL_LAYOUT
&&
(
input
!=
DataLayout
::
ALL_LAYOUT
&&
...
@@ -202,9 +202,9 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
...
@@ -202,9 +202,9 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
bool
trans_layout
=
false
;
bool
trans_layout
=
false
;
bool
trans_dtype
=
false
;
bool
trans_dtype
=
false
;
if
(
NeedTransformLayout
(
tensor
->
place
(),
if
(
NeedTransformLayout
(
tensor
->
layout
(),
tensor
->
layout
(),
target_args_def
.
layout
,
target_args_def
.
layout
,
tensor
->
place
(),
transform_flag
))
{
transform_flag
))
{
out
=
TransDataLayout
(
out
,
target_args_def
.
layout
);
out
=
TransDataLayout
(
out
,
target_args_def
.
layout
);
trans_layout
=
true
;
trans_layout
=
true
;
...
@@ -240,9 +240,9 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
...
@@ -240,9 +240,9 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
dense_tensor
.
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
dense_tensor
.
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
!
NeedTransformDataType
(
!
NeedTransformDataType
(
dense_tensor
.
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
dense_tensor
.
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
dense_tensor
.
place
(),
!
NeedTransformLayout
(
dense_tensor
.
layout
(),
dense_tensor
.
layout
(),
target_args_def
.
layout
,
target_args_def
.
layout
,
dense_tensor
.
place
(),
transform_flag
)))
{
transform_flag
)))
{
return
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
);
return
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
);
}
}
...
@@ -277,9 +277,9 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData(
...
@@ -277,9 +277,9 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData(
tensor_in
->
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
tensor_in
->
place
(),
target_args_def
.
backend
,
transform_flag
)
&&
!
NeedTransformDataType
(
!
NeedTransformDataType
(
tensor_in
->
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
tensor_in
->
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
tensor_in
->
place
(),
!
NeedTransformLayout
(
tensor_in
->
layout
(),
tensor_in
->
layout
(),
target_args_def
.
layout
,
target_args_def
.
layout
,
tensor_in
->
place
(),
transform_flag
)))
{
transform_flag
)))
{
pt_tensors
->
emplace_back
(
pt_tensors
->
emplace_back
(
*
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
));
*
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
));
...
...
python/paddle/fluid/tests/unittests/test_layout_autotune.py
浏览文件 @
16439bb9
...
@@ -46,6 +46,13 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -46,6 +46,13 @@ class SimpleNet(paddle.nn.Layer):
class
LayoutAutoTune
(
unittest
.
TestCase
):
class
LayoutAutoTune
(
unittest
.
TestCase
):
def
test_config
(
self
):
paddle
.
fluid
.
core
.
enable_layout_autotune
()
if
self
.
use_autoune
():
self
.
assertEqual
(
paddle
.
fluid
.
core
.
use_layout_autotune
(),
True
)
paddle
.
fluid
.
core
.
disable_layout_autotune
()
self
.
assertEqual
(
paddle
.
fluid
.
core
.
use_layout_autotune
(),
False
)
def
setUp
(
self
):
def
setUp
(
self
):
self
.
use_autoune
()
self
.
use_autoune
()
...
...
python/paddle/nn/functional/conv.py
浏览文件 @
16439bb9
...
@@ -130,15 +130,13 @@ def _conv_nd(x,
...
@@ -130,15 +130,13 @@ def _conv_nd(x,
if
bias
is
not
None
:
if
bias
is
not
None
:
channel_dim
=
channel_dim
+
len
(
channel_dim
=
channel_dim
+
len
(
x
.
shape
)
if
channel_dim
<
0
else
channel_dim
x
.
shape
)
if
channel_dim
<
0
else
channel_dim
if
pre_bias
.
layout
==
"NHWC"
:
channel_dim
=
3
# last dim
if
isinstance
(
x
,
tuple
):
if
isinstance
(
x
,
tuple
):
x
=
x
[
0
]
x
=
x
[
0
]
if
isinstance
(
bias
,
tuple
):
if
isinstance
(
bias
,
tuple
):
bias
=
bias
[
0
]
bias
=
bias
[
0
]
if
len
(
bias
.
shape
)
<
len
(
x
.
shape
):
if
len
(
bias
.
shape
)
<
len
(
x
.
shape
):
tmp_bias
=
_C_ops
.
reshape
(
tmp_bias
=
_C_ops
.
reshape
(
bias
,
bias
.
shape
+
bias
,
[
1
for
i
in
range
(
channel_dim
)]
+
bias
.
shape
+
[
1
for
i
in
range
(
len
(
x
.
shape
)
-
channel_dim
-
1
)])
[
1
for
i
in
range
(
len
(
x
.
shape
)
-
channel_dim
-
1
)])
return
_C_ops
.
add
(
pre_bias
,
tmp_bias
)
return
_C_ops
.
add
(
pre_bias
,
tmp_bias
)
else
:
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录