Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
69dd43d1
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
69dd43d1
编写于
3月 15, 2022
作者:
F
furnace
提交者:
GitHub
3月 15, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] add AMP O1 support (#40362)
* [NPU] add AMP O1 support * [NPU] fix NOTE and warnings
上级
2c5edb4f
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
37 addition
and
14 deletion
+37
-14
paddle/fluid/imperative/amp_auto_cast.cc
paddle/fluid/imperative/amp_auto_cast.cc
+3
-1
paddle/fluid/pybind/op_function_generator.h
paddle/fluid/pybind/op_function_generator.h
+1
-0
python/paddle/fluid/dygraph/amp/auto_cast.py
python/paddle/fluid/dygraph/amp/auto_cast.py
+8
-3
python/paddle/fluid/dygraph/amp/loss_scaler.py
python/paddle/fluid/dygraph/amp/loss_scaler.py
+25
-10
未找到文件。
paddle/fluid/imperative/amp_auto_cast.cc
浏览文件 @
69dd43d1
...
...
@@ -209,7 +209,9 @@ inline bool NeedCast(const std::shared_ptr<VarType>& var) {
auto
data_type
=
GetDataType
<
VarType
>
(
var
);
if
(
paddle
::
platform
::
is_gpu_place
(
place
)
||
paddle
::
platform
::
is_cuda_pinned_place
(
place
)
||
paddle
::
platform
::
is_xpu_place
(
place
))
{
paddle
::
platform
::
is_xpu_place
(
place
)
||
paddle
::
platform
::
is_npu_place
(
place
)
||
paddle
::
platform
::
is_npu_pinned_place
(
place
))
{
// CudaPinndePlace is added for varbase created by dataloader
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
FP32
||
data_type
==
paddle
::
framework
::
proto
::
VarType
::
FP16
||
...
...
paddle/fluid/pybind/op_function_generator.h
浏览文件 @
69dd43d1
...
...
@@ -88,6 +88,7 @@ std::map<std::string, std::set<std::string>> op_ins_map = {
{
"nce"
,
{
"Input"
,
"Label"
,
"Weight"
,
"Bias"
,
"SampleWeight"
,
"CustomDistProbs"
,
"CustomDistAlias"
,
"CustomDistAliasProbs"
}},
{
"check_finite_and_unscale"
,
{
"X"
,
"Scale"
,
"FloatStatus"
}},
};
// NOTE(zhiqiu): Like op_ins_map.
...
...
python/paddle/fluid/dygraph/amp/auto_cast.py
浏览文件 @
69dd43d1
...
...
@@ -271,14 +271,19 @@ def amp_guard(enable=True,
"current_tracer is None, maybe it is not in imperative mode."
)
# check device_type:
# NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16.
# NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16
, npu for float16
.
# Maybe we will support cpu for bfloat16.
if
enable
and
not
(
tracer
.
_expected_place
.
is_gpu_place
()
or
tracer
.
_expected_place
.
is_xpu_place
()):
tracer
.
_expected_place
.
is_xpu_place
()
or
tracer
.
_expected_place
.
is_npu_place
()):
warnings
.
warn
(
'amp_guard can only be enabled on CUDAPlace
and X
PUPlace, current place is %s, so it makes no effect.'
'amp_guard can only be enabled on CUDAPlace
, XPUPlace, and N
PUPlace, current place is %s, so it makes no effect.'
%
tracer
.
_expected_place
)
enable
=
False
# For npu:
if
tracer
.
_expected_place
.
is_npu_place
()
and
(
dtype
==
'bfloat16'
):
warnings
.
warn
(
'NPUPlace only support float16 amp.'
)
enable
=
False
# For xpu:
if
tracer
.
_expected_place
.
is_xpu_place
()
and
(
dtype
==
'bfloat16'
):
warnings
.
warn
(
'XPUPlace only support float16 amp.'
)
...
...
python/paddle/fluid/dygraph/amp/loss_scaler.py
浏览文件 @
69dd43d1
...
...
@@ -105,9 +105,10 @@ class AmpScaler(object):
"current_tracer is None, maybe it is not in imperative mode."
)
if
enable
and
not
(
tracer
.
_expected_place
.
is_gpu_place
()
or
tracer
.
_expected_place
.
is_xpu_place
()):
tracer
.
_expected_place
.
is_xpu_place
()
or
tracer
.
_expected_place
.
is_npu_place
()):
warnings
.
warn
(
'AmpScaler can only be enabled on CUDAPlace
and X
PUPlace, current place is %s, so it makes no effect.'
'AmpScaler can only be enabled on CUDAPlace
, XPUPlace and N
PUPlace, current place is %s, so it makes no effect.'
%
tracer
.
_expected_place
)
enable
=
False
...
...
@@ -286,14 +287,28 @@ class AmpScaler(object):
)
and
(
param
.
_grad_ivar
().
dtype
==
core
.
VarDesc
.
VarType
.
FP32
)
]
if
len
(
param_grads_fp16
):
_C_ops
.
check_finite_and_unscale
(
param_grads_fp16
,
self
.
_scale
,
param_grads_fp16
,
self
.
_temp_found_inf_fp16
)
if
len
(
param_grads_fp32
):
_C_ops
.
check_finite_and_unscale
(
param_grads_fp32
,
self
.
_scale
,
param_grads_fp32
,
self
.
_temp_found_inf_fp32
)
if
core
.
is_compiled_with_npu
():
float_status
=
_C_ops
.
alloc_float_status
()
_C_ops
.
clear_float_status
(
float_status
,
float_status
)
if
len
(
param_grads_fp16
):
_C_ops
.
check_finite_and_unscale
(
param_grads_fp16
,
self
.
_scale
,
float_status
,
param_grads_fp16
,
self
.
_temp_found_inf_fp16
)
if
len
(
param_grads_fp32
):
_C_ops
.
check_finite_and_unscale
(
param_grads_fp32
,
self
.
_scale
,
float_status
,
param_grads_fp32
,
self
.
_temp_found_inf_fp32
)
else
:
if
len
(
param_grads_fp16
):
_C_ops
.
check_finite_and_unscale
(
param_grads_fp16
,
self
.
_scale
,
param_grads_fp16
,
self
.
_temp_found_inf_fp16
)
if
len
(
param_grads_fp32
):
_C_ops
.
check_finite_and_unscale
(
param_grads_fp32
,
self
.
_scale
,
param_grads_fp32
,
self
.
_temp_found_inf_fp32
)
if
len
(
param_grads_fp16
)
and
len
(
param_grads_fp32
):
self
.
_found_inf
=
self
.
_temp_found_inf_fp16
or
self
.
_temp_found_inf_fp32
elif
len
(
param_grads_fp16
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录