Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d2b31a14
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d2b31a14
编写于
4月 26, 2021
作者:
L
Leo Chen
提交者:
GitHub
4月 26, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[AMP] Autocast to fp32 for op has no fp16 kernel (#32543)
* skip op has no fp16 kernel * add ut
上级
756f4639
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
56 addition
and
3 deletion
+56
-3
paddle/fluid/imperative/amp_auto_cast.cc
paddle/fluid/imperative/amp_auto_cast.cc
+35
-2
paddle/fluid/imperative/amp_auto_cast.h
paddle/fluid/imperative/amp_auto_cast.h
+6
-0
paddle/fluid/pybind/imperative.cc
paddle/fluid/pybind/imperative.cc
+1
-1
python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py
...d/tests/unittests/test_imperative_auto_mixed_precision.py
+14
-0
未找到文件。
paddle/fluid/imperative/amp_auto_cast.cc
浏览文件 @
d2b31a14
...
@@ -26,7 +26,24 @@ class VarBase;
...
@@ -26,7 +26,24 @@ class VarBase;
AmpOperators
::
AmpOperators
()
AmpOperators
::
AmpOperators
()
:
allow_ops_
(
new
std
::
unordered_set
<
std
::
string
>
()),
:
allow_ops_
(
new
std
::
unordered_set
<
std
::
string
>
()),
block_ops_
(
new
std
::
unordered_set
<
std
::
string
>
())
{}
block_ops_
(
new
std
::
unordered_set
<
std
::
string
>
()),
unsupported_fp16_ops_
(
new
std
::
unordered_set
<
std
::
string
>
())
{
auto
&
all_kernels
=
framework
::
OperatorWithKernel
::
AllOpKernels
();
auto
fp16_dtype
=
framework
::
proto
::
VarType
::
FP16
;
for
(
auto
it
=
all_kernels
.
begin
();
it
!=
all_kernels
.
end
();
it
++
)
{
bool
supported
=
false
;
for
(
auto
&
kernel_type
:
it
->
second
)
{
if
(
platform
::
is_gpu_place
(
kernel_type
.
first
.
place_
)
&&
kernel_type
.
first
.
data_type_
==
fp16_dtype
)
{
supported
=
true
;
}
}
if
(
!
supported
)
{
unsupported_fp16_ops_
->
insert
(
it
->
first
);
}
}
}
AmpOperators
::~
AmpOperators
()
{}
AmpOperators
::~
AmpOperators
()
{}
AmpOperators
&
AmpOperators
::
Instance
()
{
AmpOperators
&
AmpOperators
::
Instance
()
{
...
@@ -44,16 +61,26 @@ AmpOperators::GetMutableBlockOps() {
...
@@ -44,16 +61,26 @@ AmpOperators::GetMutableBlockOps() {
return
block_ops_
;
return
block_ops_
;
}
}
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
AmpOperators
::
GetMutableUnsupportedFp16Ops
()
{
return
unsupported_fp16_ops_
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
AmpOperators
&
ops
)
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
AmpOperators
&
ops
)
{
os
<<
"allow ops: "
;
os
<<
"allow ops: "
;
auto
allow_ops
=
ops
.
GetMutableAllowOps
();
auto
allow_ops
=
ops
.
GetMutableAllowOps
();
std
::
copy
((
*
allow_ops
).
begin
(),
(
*
allow_ops
).
end
(),
std
::
copy
((
*
allow_ops
).
begin
(),
(
*
allow_ops
).
end
(),
std
::
ostream_iterator
<
std
::
string
>
(
os
,
" "
));
std
::
ostream_iterator
<
std
::
string
>
(
os
,
" "
));
os
<<
"
;
"
;
os
<<
"
\n
"
;
os
<<
"block ops: "
;
os
<<
"block ops: "
;
auto
block_ops
=
ops
.
GetMutableBlockOps
();
auto
block_ops
=
ops
.
GetMutableBlockOps
();
std
::
copy
((
*
block_ops
).
begin
(),
(
*
block_ops
).
end
(),
std
::
copy
((
*
block_ops
).
begin
(),
(
*
block_ops
).
end
(),
std
::
ostream_iterator
<
std
::
string
>
(
os
,
" "
));
std
::
ostream_iterator
<
std
::
string
>
(
os
,
" "
));
os
<<
"
\n
"
;
os
<<
"unsupported fp16 ops: "
;
auto
unsupported_fp16_ops
=
ops
.
GetMutableUnsupportedFp16Ops
();
std
::
copy
((
*
unsupported_fp16_ops
).
begin
(),
(
*
unsupported_fp16_ops
).
end
(),
std
::
ostream_iterator
<
std
::
string
>
(
os
,
" "
));
return
os
;
return
os
;
}
}
...
@@ -156,6 +183,12 @@ NameVarBaseMap AutoCastInputs(const std::string& op_type,
...
@@ -156,6 +183,12 @@ NameVarBaseMap AutoCastInputs(const std::string& op_type,
return
new_ins
;
return
new_ins
;
}
else
{
}
else
{
auto
dst_type
=
GetPromoteType
(
ins
);
auto
dst_type
=
GetPromoteType
(
ins
);
// NOTE(zhiqiu): if the op has op fp16 kernel, fall back to fp32.
if
(
dst_type
==
framework
::
proto
::
VarType
::
FP16
&&
AmpOperators
::
Instance
().
GetMutableUnsupportedFp16Ops
()
->
count
(
op_type
))
{
dst_type
=
framework
::
proto
::
VarType
::
FP32
;
}
for
(
auto
&
pair
:
new_ins
)
{
for
(
auto
&
pair
:
new_ins
)
{
// NOTE(zhiqiu): batch_norm and layer_norm support only input x is fp16.
// NOTE(zhiqiu): batch_norm and layer_norm support only input x is fp16.
if
((
op_type
==
"batch_norm"
||
op_type
==
"layer_norm"
)
&&
if
((
op_type
==
"batch_norm"
||
op_type
==
"layer_norm"
)
&&
...
...
paddle/fluid/imperative/amp_auto_cast.h
浏览文件 @
d2b31a14
...
@@ -40,6 +40,9 @@ class AmpOperators {
...
@@ -40,6 +40,9 @@ class AmpOperators {
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
GetMutableBlockOps
();
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
GetMutableBlockOps
();
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
GetMutableUnsupportedFp16Ops
();
private:
private:
AmpOperators
();
// forbid calling default constructor
AmpOperators
();
// forbid calling default constructor
...
@@ -50,6 +53,9 @@ class AmpOperators {
...
@@ -50,6 +53,9 @@ class AmpOperators {
// The set of ops that support fp16 calculation and are considered numerically
// The set of ops that support fp16 calculation and are considered numerically
// dangerous and whose effects may also be observed in downstream ops.
// dangerous and whose effects may also be observed in downstream ops.
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
block_ops_
;
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
block_ops_
;
// The set of ops that has no fp16 CUDA kennel.
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
unsupported_fp16_ops_
;
};
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
AmpOperators
&
ops
);
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
AmpOperators
&
ops
);
...
...
paddle/fluid/pybind/imperative.cc
浏览文件 @
d2b31a14
...
@@ -1488,7 +1488,7 @@ void BindImperative(py::module *m_ptr) {
...
@@ -1488,7 +1488,7 @@ void BindImperative(py::module *m_ptr) {
allow_ops
);
allow_ops
);
imperative
::
AmpOperators
::
Instance
().
GetMutableBlockOps
()
->
swap
(
imperative
::
AmpOperators
::
Instance
().
GetMutableBlockOps
()
->
swap
(
block_ops
);
block_ops
);
VLOG
(
4
)
<<
"AMP operators changed, "
VLOG
(
5
)
<<
"AMP operators changed, "
<<
imperative
::
AmpOperators
::
Instance
();
<<
imperative
::
AmpOperators
::
Instance
();
})
})
.
def
(
"_get_amp_op_list"
,
.
def
(
"_get_amp_op_list"
,
...
...
python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py
浏览文件 @
d2b31a14
...
@@ -106,6 +106,20 @@ class TestAutoCast(unittest.TestCase):
...
@@ -106,6 +106,20 @@ class TestAutoCast(unittest.TestCase):
self
.
assertRaises
(
ValueError
,
func
)
self
.
assertRaises
(
ValueError
,
func
)
def
test_amp_guard_upsupported_fp16_op
(
self
):
data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
10
,
3
,
32
,
32
]).
astype
(
'float32'
)
with
fluid
.
dygraph
.
guard
():
conv2d
=
fluid
.
dygraph
.
Conv2D
(
3
,
2
,
3
,
bias_attr
=
False
,
act
=
None
)
data
=
fluid
.
dygraph
.
to_variable
(
data
)
with
fluid
.
dygraph
.
amp_guard
(
True
):
out_fp16
=
conv2d
(
data
)
out_fp32
=
paddle
.
expand_as
(
out_fp16
,
out_fp16
)
# expand_as_v2 has no fp16 kernel
self
.
assertTrue
(
data
.
dtype
==
fluid
.
core
.
VarDesc
.
VarType
.
FP32
)
self
.
assertTrue
(
out_fp16
.
dtype
==
fluid
.
core
.
VarDesc
.
VarType
.
FP16
)
self
.
assertTrue
(
out_fp32
.
dtype
==
fluid
.
core
.
VarDesc
.
VarType
.
FP32
)
class
TestAmpScaler
(
unittest
.
TestCase
):
class
TestAmpScaler
(
unittest
.
TestCase
):
def
test_scale
(
self
):
def
test_scale
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录