Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
黄金旺铺
Paddle
提交
3d5faa88
P
Paddle
项目概览
黄金旺铺
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
3d5faa88
编写于
2月 15, 2023
作者:
N
niuliling123
提交者:
GitHub
2月 15, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add Cpu tensor cast when amp_type isn't float32 (#50401)
上级
bf38175e
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
52 addition
and
10 deletion
+52
-10
paddle/fluid/eager/amp_utils.h
paddle/fluid/eager/amp_utils.h
+46
-9
paddle/fluid/eager/eager_amp_auto_cast.h
paddle/fluid/eager/eager_amp_auto_cast.h
+6
-1
未找到文件。
paddle/fluid/eager/amp_utils.h
浏览文件 @
3d5faa88
...
...
@@ -85,6 +85,39 @@ static inline paddle::experimental::DataType GetPromoteType(
return
dst_type
;
}
inline
paddle
::
experimental
::
DataType
GetDtypeWithPlace
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
kSlotSmallVectorSize
>&
amp_tensors_vector
,
const
paddle
::
experimental
::
DataType
amp_dtype
)
{
if
(
amp_dtype
==
paddle
::
experimental
::
DataType
::
FLOAT32
)
{
return
amp_dtype
;
}
bool
is_right_place
=
false
;
for
(
const
auto
&
tensors
:
amp_tensors_vector
)
{
for
(
const
auto
&
tensor
:
tensors
)
{
auto
place
=
tensor
.
place
();
is_right_place
=
(
paddle
::
platform
::
is_gpu_place
(
place
)
||
paddle
::
platform
::
is_cuda_pinned_place
(
place
)
||
paddle
::
platform
::
is_xpu_place
(
place
)
||
paddle
::
platform
::
is_mlu_place
(
place
)
||
paddle
::
platform
::
is_npu_place
(
place
)
||
paddle
::
platform
::
is_npu_pinned_place
(
place
)
||
paddle
::
platform
::
is_custom_place
(
place
));
if
(
is_right_place
)
{
break
;
}
}
}
if
(
!
is_right_place
)
{
VLOG
(
6
)
<<
"Change "
<<
op_name
<<
"'s AMP type from "
<<
amp_dtype
<<
" to FP32"
;
return
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
amp_dtype
;
}
inline
paddle
::
experimental
::
DataType
GetAmpDestDtype
(
const
std
::
string
&
op_name
,
const
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
...
...
@@ -95,19 +128,21 @@ inline paddle::experimental::DataType GetAmpDestDtype(
VLOG
(
6
)
<<
"AMP GetAmpDestDtype:"
<<
" op("
<<
op_name
<<
") amp_dtype("
<<
amp_dtype
<<
") amp_level("
<<
static_cast
<
int
>
(
amp_level
)
<<
")."
;
auto
return_amp_type
=
paddle
::
experimental
::
DataType
::
FLOAT16
;
if
(
amp_dtype
==
"float16"
)
{
if
(
amp_level
==
paddle
::
imperative
::
AmpLevel
::
O1
)
{
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableAllowOps
()
->
count
(
op_name
))
{
return
paddle
::
experimental
::
DataType
::
FLOAT16
;
return
_amp_type
=
paddle
::
experimental
::
DataType
::
FLOAT16
;
}
else
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableBlockOps
()
->
count
(
op_name
)
||
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableUnsupportedFp16Ops
()
->
count
(
op_name
))
{
return
paddle
::
experimental
::
DataType
::
FLOAT32
;
return
_amp_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
else
{
auto
dst_type
=
GetPromoteType
(
op_name
,
amp_tensors_vector
,
...
...
@@ -118,7 +153,7 @@ inline paddle::experimental::DataType GetAmpDestDtype(
->
count
(
op_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
dst_type
;
return
_amp_type
=
dst_type
;
}
}
else
if
(
amp_level
==
paddle
::
imperative
::
AmpLevel
::
O2
)
{
auto
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT16
;
...
...
@@ -130,18 +165,18 @@ inline paddle::experimental::DataType GetAmpDestDtype(
->
count
(
op_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
dst_type
;
return
_amp_type
=
dst_type
;
}
}
else
if
(
amp_dtype
==
"bfloat16"
)
{
if
(
amp_level
==
paddle
::
imperative
::
AmpLevel
::
O1
)
{
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableAllowOps
()
->
count
(
op_name
))
{
return
paddle
::
experimental
::
DataType
::
BFLOAT16
;
return
_amp_type
=
paddle
::
experimental
::
DataType
::
BFLOAT16
;
}
else
if
(
paddle
::
imperative
::
AmpOperators
::
Instance
()
.
GetMutableBlockOps
()
->
count
(
op_name
))
{
return
paddle
::
experimental
::
DataType
::
FLOAT32
;
return
_amp_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
else
{
auto
dst_type
=
GetPromoteType
(
op_name
,
...
...
@@ -153,7 +188,7 @@ inline paddle::experimental::DataType GetAmpDestDtype(
->
count
(
op_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
dst_type
;
return
_amp_type
=
dst_type
;
}
}
else
if
(
amp_level
==
paddle
::
imperative
::
AmpLevel
::
O2
)
{
auto
dst_type
=
paddle
::
experimental
::
DataType
::
BFLOAT16
;
...
...
@@ -165,10 +200,12 @@ inline paddle::experimental::DataType GetAmpDestDtype(
->
count
(
op_name
))
{
dst_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
dst_type
;
return
_amp_type
=
dst_type
;
}
}
else
{
return_amp_type
=
paddle
::
experimental
::
DataType
::
FLOAT32
;
}
return
paddle
::
experimental
::
DataType
::
FLOAT32
;
return
GetDtypeWithPlace
(
op_name
,
amp_tensors_vector
,
return_amp_type
)
;
}
}
// namespace egr
paddle/fluid/eager/eager_amp_auto_cast.h
浏览文件 @
3d5faa88
...
...
@@ -22,14 +22,19 @@ static inline bool NeedCast(const paddle::experimental::Tensor& tensor,
const
paddle
::
experimental
::
DataType
&
dst_dtype
)
{
auto
place
=
tensor
.
place
();
auto
data_type
=
tensor
.
dtype
();
// Except CPU judgment, other conditions should be consistent with
// amp_utils.h's judgment
if
(
paddle
::
platform
::
is_gpu_place
(
place
)
||
paddle
::
platform
::
is_cuda_pinned_place
(
place
)
||
paddle
::
platform
::
is_xpu_place
(
place
)
||
paddle
::
platform
::
is_mlu_place
(
place
)
||
paddle
::
platform
::
is_npu_place
(
place
)
||
paddle
::
platform
::
is_npu_pinned_place
(
place
)
||
paddle
::
platform
::
is_custom_place
(
place
))
{
paddle
::
platform
::
is_custom_place
(
place
)
||
paddle
::
platform
::
is_cpu_place
(
place
))
{
// CudaPinndePlace is added for varbase created by dataloader
// Cpu place is for differnt place tensor, when input1 is cpu and input2 is
// gpu
if
((
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT32
||
data_type
==
paddle
::
experimental
::
DataType
::
FLOAT16
||
data_type
==
paddle
::
experimental
::
DataType
::
BFLOAT16
)
&&
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录