Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
3b895425
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3b895425
编写于
3月 26, 2022
作者:
Z
zhangbo9674
提交者:
GitHub
3月 26, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[AMP] add amp for final_status_dygraph (#40945)
* add amp for final status * solve compile error
上级
ea9684f1
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
274 addition
and
100 deletion
+274
-100
paddle/fluid/eager/amp_auto_cast.h
paddle/fluid/eager/amp_auto_cast.h
+97
-0
paddle/fluid/eager/amp_utils.h
paddle/fluid/eager/amp_utils.h
+21
-98
paddle/fluid/eager/auto_code_generator/eager_generator.cc
paddle/fluid/eager/auto_code_generator/eager_generator.cc
+1
-0
paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
...er/auto_code_generator/final_state_generator/eager_gen.py
+65
-2
paddle/fluid/eager/eager_amp_auto_cast.h
paddle/fluid/eager/eager_amp_auto_cast.h
+90
-0
未找到文件。
paddle/fluid/eager/amp_auto_cast.h
0 → 100644
浏览文件 @
3b895425
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/framework/convert_utils.h"
namespace
egr
{
static
inline
bool
NeedCast
(
const
paddle
::
experimental
::
Tensor
&
tensor
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
)
{
auto
place
=
tensor
.
inner_place
();
auto
data_type
=
tensor
.
dtype
();
if
(
paddle
::
platform
::
is_gpu_place
(
place
)
||
paddle
::
platform
::
is_cuda_pinned_place
(
place
)
||
paddle
::
platform
::
is_xpu_place
(
place
)
||
paddle
::
platform
::
is_mlu_place
(
place
)
||
paddle
::
platform
::
is_npu_place
(
place
)
||
paddle
::
platform
::
is_npu_pinned_place
(
place
))
{
// CudaPinndePlace is added for varbase created by dataloader
if
((
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
||
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
||
data_type
==
paddle
::
experimental
::
DataType
::
BFLOAT16
)
&&
(
data_type
!=
dst_dtype
))
{
return
true
;
}
}
return
false
;
}
inline
std
::
vector
<
paddle
::
experimental
::
Tensor
>
AmpAutoCasts
(
const
std
::
string
&
inputs_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
,
std
::
string
op_name
)
{
VLOG
(
6
)
<<
"AMP AmpAutoCasts:"
<<
" inputs("
<<
inputs_name
<<
") dst_dtype("
<<
paddle
::
framework
::
DataType2String
(
dst_dtype
)
<<
")."
;
std
::
vector
<
paddle
::
experimental
::
Tensor
>
inputs_casted
;
for
(
auto
&
input
:
inputs
)
{
if
(
NeedCast
(
input
,
dst_dtype
))
{
paddle
::
framework
::
AttributeMap
cast_attrs
=
{
{
"in_dtype"
,
paddle
::
framework
::
TransToProtoVarType
(
input
.
dtype
())},
{
"out_dtype"
,
paddle
::
framework
::
TransToProtoVarType
(
dst_dtype
)}};
inputs_casted
.
emplace_back
(
std
::
move
(
cast_dygraph_function
(
input
,
cast_attrs
)));
}
else
{
inputs_casted
.
emplace_back
(
input
);
}
}
return
inputs_casted
;
}
inline
paddle
::
experimental
::
Tensor
AmpAutoCast
(
const
std
::
string
&
input_name
,
const
paddle
::
experimental
::
Tensor
&
input
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
,
std
::
string
op_name
)
{
VLOG
(
6
)
<<
"AMP AmpAutoCasts:"
<<
" input("
<<
input_name
<<
") dst_dtype("
<<
paddle
::
framework
::
DataType2String
(
dst_dtype
)
<<
")."
;
if
(
dst_dtype
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
{
if
(
op_name
==
"run_program"
)
{
return
input
;
}
if
((
op_name
==
"batch_norm"
||
op_name
==
"layer_norm"
||
op_name
==
"sync_batch_norm"
)
&&
input_name
!=
"X"
)
{
return
input
;
}
if
((
op_name
==
"fused_attention"
||
op_name
==
"fused_feedforward"
))
{
if
(
input_name
==
"LnScale"
||
input_name
==
"LnBias"
||
input_name
==
"Ln2Scale"
||
input_name
==
"Ln2Bias"
||
input_name
==
"Ln1Scale"
||
input_name
==
"Ln1Bias"
)
{
return
input
;
}
}
}
if
(
NeedCast
(
input
,
dst_dtype
))
{
paddle
::
framework
::
AttributeMap
cast_attrs
=
{
{
"in_dtype"
,
paddle
::
framework
::
TransToProtoVarType
(
input
.
dtype
())},
{
"out_dtype"
,
paddle
::
framework
::
TransToProtoVarType
(
dst_dtype
)}};
return
cast_dygraph_function
(
input
,
cast_attrs
);
}
return
input
;
}
}
// namespace egr
paddle/fluid/eager/amp_utils.h
浏览文件 @
3b895425
...
...
@@ -13,30 +13,27 @@
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/imperative/amp_auto_cast.h"
namespace
egr
{
static
inline
paddle
::
experimental
::
DataType
GetPromoteType
(
const
std
::
string
&
api
_name
,
const
std
::
string
&
op
_name
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
amp_tensors_vector
,
const
paddle
::
experimental
::
DataType
&
amp_dtype
)
{
auto
dst_type
=
amp_dtype
;
if
(
egr
::
Controller
::
Instance
().
GetCurrentTracer
()
->
GetAmpDtype
()
==
"float16"
)
{
if
(
api_name
==
"batch_norm"
||
api
_name
==
"layer_norm"
||
api
_name
==
"sync_batch_norm"
)
{
if
(
op_name
==
"batch_norm"
||
op
_name
==
"layer_norm"
||
op
_name
==
"sync_batch_norm"
)
{
if
(
amp_tensors_vector
[
0
][
0
].
dtype
()
==
paddle
::
experimental
::
DataType
::
FLOAT32
)
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
}
else
if
(
api
_name
==
"fused_attention"
)
{
}
else
if
(
op
_name
==
"fused_attention"
)
{
for
(
size_t
i
=
0
;
i
<
amp_tensors_vector
.
size
();
i
++
)
{
if
(
i
!=
3
||
i
!=
4
||
i
!=
9
||
i
!=
10
)
{
if
(
amp_tensors_vector
[
i
][
0
].
dtype
()
==
...
...
@@ -46,7 +43,7 @@ static inline paddle::experimental::DataType GetPromoteType(
}
}
}
}
else
if
(
api
_name
==
"fused_feedforward"
)
{
}
else
if
(
op
_name
==
"fused_feedforward"
)
{
for
(
size_t
i
=
0
;
i
<
amp_tensors_vector
.
size
();
i
++
)
{
if
(
i
!=
7
||
i
!=
8
||
i
!=
9
||
i
!=
10
)
{
if
(
amp_tensors_vector
[
i
][
0
].
dtype
()
==
...
...
@@ -78,7 +75,7 @@ static inline paddle::experimental::DataType GetPromoteType(
}
// NOTE(juncai): moving_average_abs_max_scale only consider the dtype of
// input(X)
if
(
api
_name
==
"moving_average_abs_max_scale"
)
{
if
(
op
_name
==
"moving_average_abs_max_scale"
)
{
if
(
amp_tensors_vector
[
0
][
0
].
dtype
()
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT16
;
...
...
@@ -87,33 +84,33 @@ static inline paddle::experimental::DataType GetPromoteType(
return
dst_type
;
}
paddle
::
experimental
::
DataType
GetAmpDestDtype
(
const
std
::
string
&
api
_name
,
inline
paddle
::
experimental
::
DataType
GetAmpDestDtype
(
const
std
::
string
&
op
_name
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
amp_tensors_vector
)
{
auto
amp_dtype
=
egr
::
Controller
::
Instance
().
GetCurrentTracer
()
->
GetAmpDtype
();
auto
amp_level
=
egr
::
Controller
::
Instance
().
GetAMPLevel
();
VLOG
(
6
)
<<
"AMP GetAmpDestDtype:"
<<
" op("
<<
api
_name
<<
") amp_dtype("
<<
amp_dtype
<<
") amp_level("
<<
" op("
<<
op
_name
<<
") amp_dtype("
<<
amp_dtype
<<
") amp_level("
<<
static_cast
<
int
>
(
amp_level
)
<<
")."
;
if
(
amp_dtype
==
"float16"
)
{
if
(
amp_level
==
paddle
::
imperative
::
AmpLevel
::
O1
)
{
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableAllowOps
()
->
count
(
api
_name
))
{
->
count
(
op
_name
))
{
return
paddle
::
experimental
::
DataType
::
FLOAT16
;
}
else
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableBlockOps
()
->
count
(
api
_name
))
{
->
count
(
op
_name
))
{
return
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
else
{
auto
dst_type
=
GetPromoteType
(
api
_name
,
amp_tensors_vector
,
auto
dst_type
=
GetPromoteType
(
op
_name
,
amp_tensors_vector
,
paddle
::
experimental
::
DataType
::
FLOAT16
);
if
(
dst_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
&&
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableUnsupportedFp16Ops
()
->
count
(
api
_name
))
{
->
count
(
op
_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
dst_type
;
...
...
@@ -122,10 +119,10 @@ paddle::experimental::DataType GetAmpDestDtype(
auto
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT16
;
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableUnsupportedFp16Ops
()
->
count
(
api
_name
)
||
->
count
(
op
_name
)
||
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableBlockOps
()
->
count
(
api
_name
))
{
->
count
(
op
_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
dst_type
;
...
...
@@ -134,20 +131,20 @@ paddle::experimental::DataType GetAmpDestDtype(
if
(
amp_level
==
paddle
::
imperative
::
AmpLevel
::
O1
)
{
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableAllowOps
()
->
count
(
api
_name
))
{
->
count
(
op
_name
))
{
return
paddle
::
experimental
::
DataType
::
BFLOAT16
;
}
else
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableBlockOps
()
->
count
(
api
_name
))
{
->
count
(
op
_name
))
{
return
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
else
{
auto
dst_type
=
GetPromoteType
(
api
_name
,
amp_tensors_vector
,
GetPromoteType
(
op
_name
,
amp_tensors_vector
,
paddle
::
experimental
::
DataType
::
BFLOAT16
);
if
(
dst_type
==
paddle
::
experimental
::
DataType
::
BFLOAT16
&&
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableUnsupportedBf16Ops
()
->
count
(
api
_name
))
{
->
count
(
op
_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
dst_type
;
...
...
@@ -156,10 +153,10 @@ paddle::experimental::DataType GetAmpDestDtype(
auto
dst_type
=
paddle
::
experimental
::
DataType
::
BFLOAT16
;
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableUnsupportedBf16Ops
()
->
count
(
api
_name
)
||
->
count
(
op
_name
)
||
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableBlockOps
()
->
count
(
api
_name
))
{
->
count
(
op
_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
dst_type
;
...
...
@@ -168,78 +165,4 @@ paddle::experimental::DataType GetAmpDestDtype(
return
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
static
inline
bool
NeedCast
(
const
paddle
::
experimental
::
Tensor
&
tensor
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
)
{
auto
place
=
tensor
.
inner_place
();
auto
data_type
=
tensor
.
dtype
();
if
(
paddle
::
platform
::
is_gpu_place
(
place
)
||
paddle
::
platform
::
is_cuda_pinned_place
(
place
)
||
paddle
::
platform
::
is_xpu_place
(
place
)
||
paddle
::
platform
::
is_mlu_place
(
place
)
||
paddle
::
platform
::
is_npu_place
(
place
)
||
paddle
::
platform
::
is_npu_pinned_place
(
place
))
{
// CudaPinndePlace is added for varbase created by dataloader
if
((
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
||
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
||
data_type
==
paddle
::
experimental
::
DataType
::
BFLOAT16
)
&&
(
data_type
!=
dst_dtype
))
{
return
true
;
}
}
return
false
;
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
AmpAutoCasts
(
const
std
::
string
&
inputs_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
,
std
::
string
api_name
)
{
VLOG
(
6
)
<<
"AMP AmpAutoCasts:"
<<
" inputs("
<<
inputs_name
<<
") dst_dtype("
<<
paddle
::
framework
::
DataType2String
(
dst_dtype
)
<<
")."
;
std
::
vector
<
paddle
::
experimental
::
Tensor
>
inputs_casted
;
for
(
auto
&
input
:
inputs
)
{
if
(
NeedCast
(
input
,
dst_dtype
))
{
paddle
::
framework
::
AttributeMap
cast_attrs
=
{
{
"in_dtype"
,
paddle
::
framework
::
TransToProtoVarType
(
input
.
dtype
())},
{
"out_dtype"
,
paddle
::
framework
::
TransToProtoVarType
(
dst_dtype
)}};
inputs_casted
.
emplace_back
(
std
::
move
(
cast_dygraph_function
(
input
,
cast_attrs
)));
}
else
{
inputs_casted
.
emplace_back
(
input
);
}
}
return
inputs_casted
;
}
paddle
::
experimental
::
Tensor
AmpAutoCast
(
const
std
::
string
&
input_name
,
const
paddle
::
experimental
::
Tensor
&
input
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
,
std
::
string
api_name
)
{
VLOG
(
6
)
<<
"AMP AmpAutoCasts:"
<<
" input("
<<
input_name
<<
") dst_dtype("
<<
paddle
::
framework
::
DataType2String
(
dst_dtype
)
<<
")."
;
if
(
dst_dtype
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
{
if
(
api_name
==
"run_program"
)
{
return
input
;
}
if
((
api_name
==
"batch_norm"
||
api_name
==
"layer_norm"
||
api_name
==
"sync_batch_norm"
)
&&
input_name
!=
"X"
)
{
return
input
;
}
if
((
api_name
==
"fused_attention"
||
api_name
==
"fused_feedforward"
))
{
if
(
input_name
==
"LnScale"
||
input_name
==
"LnBias"
||
input_name
==
"Ln2Scale"
||
input_name
==
"Ln2Bias"
||
input_name
==
"Ln1Scale"
||
input_name
==
"Ln1Bias"
)
{
return
input
;
}
}
}
if
(
NeedCast
(
input
,
dst_dtype
))
{
paddle
::
framework
::
AttributeMap
cast_attrs
=
{
{
"in_dtype"
,
paddle
::
framework
::
TransToProtoVarType
(
input
.
dtype
())},
{
"out_dtype"
,
paddle
::
framework
::
TransToProtoVarType
(
dst_dtype
)}};
return
cast_dygraph_function
(
input
,
cast_attrs
);
}
return
input
;
}
}
// namespace egr
paddle/fluid/eager/auto_code_generator/eager_generator.cc
浏览文件 @
3b895425
...
...
@@ -2587,6 +2587,7 @@ static void GenerateForwardDygraphFile(const std::string& forward_cc_path,
"
\"
paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.h
\"\n
"
"#include
\"
paddle/fluid/eager/api/utils/global_utils.h
\"\n
"
"#include
\"
paddle/fluid/eager/amp_utils.h
\"\n
"
"#include
\"
paddle/fluid/eager/amp_auto_cast.h
\"\n
"
"#include
\"
paddle/fluid/platform/profiler/event_tracing.h
\"\n\n
"
;
std
::
string
forward_cc_include_str
=
paddle
::
string
::
Sprintf
(
FORWARD_INCLUDE_TEMPLATE
);
...
...
paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
浏览文件 @
3b895425
...
...
@@ -163,7 +163,7 @@ FORWARD_FUNCTION_TEMPLATE = \
"""
{} {}({}) {{
{}
{}
{}
// Returns
...
...
@@ -249,6 +249,8 @@ FORWARD_CC_FILE_TEMPLATE = \
#include "paddle/phi/api/include/sparse_api.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/eager_amp_auto_cast.h"
{}
{}
...
...
@@ -304,6 +306,23 @@ BUMP_INPLACE_VERSION_TEMPLATE = \
"""
AMP_LOGIC_TEMPLATE
=
\
"""
if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{
VLOG(5) << "Check and Prepare For AMP";
{}
std::vector<std::vector<paddle::experimental::Tensor>> amp_tensors_vector = {};
{}
{}
{}
{{
paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
{}
}}
}}
"""
#######################
## Generator Helpers ##
#######################
...
...
@@ -769,26 +788,51 @@ class DygraphSingleFunctionGenerator(FunctionGeneratorBase):
inputs_args_definition_list
=
[
""
for
i
in
range
(
num_inputs
)]
inputs_args_declaration_list
=
[
""
for
i
in
range
(
num_inputs
)]
inputs_call_list
=
[
""
for
i
in
range
(
num_inputs
)]
amp_inputs_call_list
=
[
""
for
i
in
range
(
num_inputs
)]
amp_tensors_vector_list
=
[]
amp_tensors_vector_optional_list
=
[]
amp_autocast_list
=
[]
amp_autocast_optional_list
=
[]
for
name
,
(
ttype
,
pos
)
in
forward_inputs_position_map
.
items
():
inputs_call_list
[
pos
]
=
f
"
{
name
}
"
amp_inputs_call_list
[
pos
]
=
f
"NEW_
{
name
}
"
is_optional
=
(
name
in
optional_inputs
)
if
IsPlainTensorType
(
ttype
):
if
is_optional
:
arg_str
=
f
"const paddle::optional<const paddle::experimental::Tensor&>
{
name
}
"
amp_tensors_vector_optional_list
.
append
(
f
"if (
{
name
}
.is)initialized() amp_tensors_vector.push_back(
{
name
}
.get()));
\n
"
)
amp_autocast_optional_list
.
append
(
f
"auto NEW_
{
name
}
=
{
name
}
.is_initialized() ? egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name) :
{
name
}
;
\n
"
)
else
:
if
inplace_map
and
name
in
inplace_map
.
keys
():
arg_str
=
f
"paddle::experimental::Tensor&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_autocast_list
.
append
(
f
"auto NEW_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
else
:
arg_str
=
f
"const paddle::experimental::Tensor&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"{{
{
name
}
}}"
)
amp_autocast_list
.
append
(
f
"auto NEW_
{
name
}
= egr::EagerAmpAutoCast(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
else
:
assert
IsVectorTensorType
(
ttype
)
arg_str
=
f
"const std::vector<paddle::experimental::Tensor>&
{
name
}
"
amp_tensors_vector_list
.
append
(
f
"
{
name
}
"
)
amp_autocast_list
.
append
(
f
"auto NEW_
{
name
}
= egr::EagerAmpAutoCasts(
\"
{
name
}
\"
,
{
name
}
, amp_dst_dtype, op_name);
\n
"
)
inputs_args_definition_list
[
pos
]
=
arg_str
inputs_args_declaration_list
[
pos
]
=
arg_str
for
name
,
atype
,
default_val
,
pos
in
forward_attrs_list
:
inputs_call_list
[
pos
]
=
name
amp_inputs_call_list
[
pos
]
=
name
if
default_val
is
not
None
:
inputs_args_declaration_list
[
pos
]
=
f
"
{
atype
}
{
name
}
=
{
default_val
}
"
...
...
@@ -843,9 +887,28 @@ class DygraphSingleFunctionGenerator(FunctionGeneratorBase):
dygraph_event_str
=
f
"paddle::platform::RecordEvent dygraph_entrance_record_event(
\"
{
forward_api_name
}
dygraph
\"
, paddle::platform::TracerEventType::Operator, 1);"
forward_function_name
=
GetDygraphForwardFunctionName
(
forward_api_name
)
# Forward amp logic
kernel_trans2_op_name_str
=
f
"auto op_name = phi::TransToFluidOpName(
\"
{
forward_api_name
}
\"
);"
amp_tensors_vector_list_str
=
"{ "
+
","
.
join
(
amp_tensors_vector_list
)
+
" }"
amp_tensors_vector_optional_list_str
=
""
.
join
(
amp_tensors_vector_optional_list
)
amp_get_dst_dtype_str
=
f
"auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);
\n
"
amp_autocast_list_str
=
" "
.
join
(
amp_autocast_list
)
+
" "
.
join
(
amp_autocast_optional_list
)
amp_inputs_call_args_str
=
", "
.
join
(
amp_inputs_call_list
)
amp_call_str
=
f
"return
{
forward_function_name
}
(
{
amp_inputs_call_args_str
}
);"
if
is_inplaced
or
(
forward_api_name
==
"cast"
):
amp_logic_str
=
""
else
:
amp_logic_str
=
AMP_LOGIC_TEMPLATE
.
format
(
kernel_trans2_op_name_str
,
amp_tensors_vector_list_str
,
amp_tensors_vector_optional_list_str
,
amp_get_dst_dtype_str
,
amp_autocast_list_str
,
amp_call_str
)
self
.
forward_definition_str
+=
FORWARD_FUNCTION_TEMPLATE
.
format
(
returns_type_str
,
forward_function_name
,
inputs_args_definition_str
,
dygraph_event_str
,
node_creation_str
,
returns_str
)
dygraph_event_str
,
amp_logic_str
,
node_creation_str
,
returns_str
)
self
.
forward_declaration_str
+=
f
"
{
returns_type_str
}
{
forward_function_name
}
(
{
inputs_args_declaration_str
}
);
\n
"
logging
.
info
(
...
...
paddle/fluid/eager/eager_amp_auto_cast.h
0 → 100644
浏览文件 @
3b895425
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
namespace
egr
{
static
inline
bool
NeedCast
(
const
paddle
::
experimental
::
Tensor
&
tensor
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
)
{
auto
place
=
tensor
.
inner_place
();
auto
data_type
=
tensor
.
dtype
();
if
(
paddle
::
platform
::
is_gpu_place
(
place
)
||
paddle
::
platform
::
is_cuda_pinned_place
(
place
)
||
paddle
::
platform
::
is_xpu_place
(
place
)
||
paddle
::
platform
::
is_mlu_place
(
place
)
||
paddle
::
platform
::
is_npu_place
(
place
)
||
paddle
::
platform
::
is_npu_pinned_place
(
place
))
{
// CudaPinndePlace is added for varbase created by dataloader
if
((
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
||
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
||
data_type
==
paddle
::
experimental
::
DataType
::
BFLOAT16
)
&&
(
data_type
!=
dst_dtype
))
{
return
true
;
}
}
return
false
;
}
inline
std
::
vector
<
paddle
::
experimental
::
Tensor
>
EagerAmpAutoCasts
(
const
std
::
string
&
inputs_name
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
,
std
::
string
op_name
)
{
VLOG
(
6
)
<<
"AMP AmpAutoCasts:"
<<
" inputs("
<<
inputs_name
<<
") dst_dtype("
<<
paddle
::
framework
::
DataType2String
(
dst_dtype
)
<<
")."
;
std
::
vector
<
paddle
::
experimental
::
Tensor
>
inputs_casted
;
for
(
auto
&
input
:
inputs
)
{
if
(
NeedCast
(
input
,
dst_dtype
))
{
inputs_casted
.
emplace_back
(
std
::
move
(
cast_final_state_dygraph_function
(
input
,
dst_dtype
)));
}
else
{
inputs_casted
.
emplace_back
(
input
);
}
}
return
inputs_casted
;
}
inline
paddle
::
experimental
::
Tensor
EagerAmpAutoCast
(
const
std
::
string
&
input_name
,
const
paddle
::
experimental
::
Tensor
&
input
,
const
paddle
::
experimental
::
DataType
&
dst_dtype
,
std
::
string
op_name
)
{
VLOG
(
6
)
<<
"AMP AmpAutoCasts:"
<<
" input("
<<
input_name
<<
") dst_dtype("
<<
paddle
::
framework
::
DataType2String
(
dst_dtype
)
<<
")."
;
if
(
dst_dtype
==
paddle
::
experimental
::
DataType
::
FLOAT16
)
{
if
(
op_name
==
"run_program"
)
{
return
input
;
}
if
((
op_name
==
"batch_norm"
||
op_name
==
"layer_norm"
||
op_name
==
"sync_batch_norm"
)
&&
input_name
!=
"x"
)
{
return
input
;
}
if
((
op_name
==
"fused_attention"
||
op_name
==
"fused_feedforward"
))
{
if
(
input_name
==
"LnScale"
||
input_name
==
"LnBias"
||
input_name
==
"Ln2Scale"
||
input_name
==
"Ln2Bias"
||
input_name
==
"Ln1Scale"
||
input_name
==
"Ln1Bias"
)
{
return
input
;
}
}
}
if
(
NeedCast
(
input
,
dst_dtype
))
{
return
cast_final_state_dygraph_function
(
input
,
dst_dtype
);
}
return
input
;
}
}
// namespace egr
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录