Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7043b8cf
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7043b8cf
编写于
1月 19, 2021
作者:
L
Leo Chen
提交者:
GitHub
1月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support layer_norm fp16 in dygraph amp (#30430)
* support layer_norm fp16 in dygraph amp * add ut * refine code
上级
28eb7b65
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
84 addition
and
48 deletion
+84
-48
paddle/fluid/imperative/amp_auto_cast.cc
paddle/fluid/imperative/amp_auto_cast.cc
+43
-28
paddle/fluid/imperative/amp_auto_cast.h
paddle/fluid/imperative/amp_auto_cast.h
+4
-2
paddle/fluid/pybind/imperative.cc
paddle/fluid/pybind/imperative.cc
+21
-18
python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py
...d/tests/unittests/test_imperative_auto_mixed_precision.py
+16
-0
未找到文件。
paddle/fluid/imperative/amp_auto_cast.cc
浏览文件 @
7043b8cf
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/imperative/amp_auto_cast.h"
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
...
...
@@ -35,14 +36,29 @@ AmpOperators& AmpOperators::Instance() {
return
instance
;
}
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
AmpOperators
::
GetAllowOps
()
{
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
AmpOperators
::
GetMutableAllowOps
()
{
return
allow_ops_
;
}
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
AmpOperators
::
GetBlockOps
()
{
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
AmpOperators
::
GetMutableBlockOps
()
{
return
block_ops_
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
AmpOperators
&
ops
)
{
os
<<
"allow ops: "
;
auto
allow_ops
=
ops
.
GetMutableAllowOps
();
std
::
copy
((
*
allow_ops
).
begin
(),
(
*
allow_ops
).
end
(),
std
::
ostream_iterator
<
std
::
string
>
(
os
,
" "
));
os
<<
"; "
;
os
<<
"block ops: "
;
auto
block_ops
=
ops
.
GetMutableBlockOps
();
std
::
copy
((
*
block_ops
).
begin
(),
(
*
block_ops
).
end
(),
std
::
ostream_iterator
<
std
::
string
>
(
os
,
" "
));
return
os
;
}
inline
std
::
string
GetDtypeStr
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
var
)
{
return
framework
::
DataTypeToString
(
var
->
DataType
());
...
...
@@ -115,51 +131,50 @@ static inline framework::proto::VarType::Type GetPromoteType(
NameVarBaseMap
AutoCastInputs
(
const
std
::
string
&
op_type
,
const
NameVarBaseMap
&
ins
)
{
NameVarBaseMap
new_ins
=
{};
if
(
AmpOperators
::
Instance
().
GetAllowOps
()
->
count
(
op_type
))
{
for
(
const
auto
&
pair
:
ins
)
{
NameVarBaseMap
new_ins
(
ins
);
if
(
AmpOperators
::
Instance
().
GetMutableAllowOps
()
->
count
(
op_type
))
{
for
(
auto
&
pair
:
new_ins
)
{
// NOTE(zhiqiu): batch_norm and layer_norm support only input x is fp16.
if
((
op_type
==
"batch_norm"
||
op_type
==
"layer_norm"
)
&&
pair
.
first
!=
"X"
)
{
continue
;
}
VLOG
(
5
)
<<
"Op("
<<
op_type
<<
"): Cast "
<<
pair
.
first
<<
" from "
<<
GetDtypeStr
(
*
pair
.
second
.
cbegin
())
<<
" to float16"
;
for
(
const
auto
&
var
:
pair
.
second
)
{
auto
new_var
=
CastToFP16
(
var
);
new_ins
[
pair
.
first
].
emplace_back
(
new_var
);
for
(
auto
&
var
:
pair
.
second
)
{
var
=
CastToFP16
(
var
);
}
}
return
new_ins
;
}
else
if
(
AmpOperators
::
Instance
().
GetBlockOps
()
->
count
(
op_type
))
{
for
(
const
auto
&
pair
:
ins
)
{
}
else
if
(
AmpOperators
::
Instance
().
Get
Mutable
BlockOps
()
->
count
(
op_type
))
{
for
(
auto
&
pair
:
new_
ins
)
{
VLOG
(
5
)
<<
"Op("
<<
op_type
<<
"): Cast "
<<
pair
.
first
<<
" from "
<<
GetDtypeStr
(
*
pair
.
second
.
cbegin
())
<<
" to float"
;
for
(
const
auto
&
var
:
pair
.
second
)
{
auto
new_var
=
CastToFP32
(
var
);
new_ins
[
pair
.
first
].
emplace_back
(
new_var
);
for
(
auto
&
var
:
pair
.
second
)
{
var
=
CastToFP32
(
var
);
}
}
return
new_ins
;
}
else
{
auto
dst_type
=
GetPromoteType
(
ins
);
for
(
const
auto
&
pair
:
ins
)
{
for
(
auto
&
pair
:
new_ins
)
{
// NOTE(zhiqiu): batch_norm and layer_norm support only input x is fp16.
if
((
op_type
==
"batch_norm"
||
op_type
==
"layer_norm"
)
&&
pair
.
first
==
"X"
&&
dst_type
==
framework
::
proto
::
VarType
::
FP32
)
{
continue
;
}
VLOG
(
5
)
<<
"Op("
<<
op_type
<<
"): Cast "
<<
pair
.
first
<<
" from "
<<
GetDtypeStr
(
*
pair
.
second
.
cbegin
())
<<
" to "
<<
framework
::
DataTypeToString
(
dst_type
);
for
(
const
auto
&
var
:
pair
.
second
)
{
// NOTE(zhiqiu): Conv + BN always occur together, we needn't
// cast X of batch_norm to FP32, which is produced by conv as FP16 type.
if
(
op_type
==
"batch_norm"
&&
pair
.
first
==
"X"
&&
dst_type
==
framework
::
proto
::
VarType
::
FP32
)
{
new_ins
[
pair
.
first
].
emplace_back
(
var
);
continue
;
}
auto
new_var
=
dst_type
==
framework
::
proto
::
VarType
::
FP32
?
CastToFP32
(
var
)
:
CastToFP16
(
var
);
new_ins
[
pair
.
first
].
emplace_back
(
new_var
);
for
(
auto
&
var
:
pair
.
second
)
{
var
=
(
dst_type
==
framework
::
proto
::
VarType
::
FP32
?
CastToFP32
(
var
)
:
CastToFP16
(
var
));
}
}
return
new_ins
;
}
return
ins
;
return
new_
ins
;
}
}
// namespace imperative
...
...
paddle/fluid/imperative/amp_auto_cast.h
浏览文件 @
7043b8cf
...
...
@@ -36,9 +36,9 @@ class AmpOperators {
static
AmpOperators
&
Instance
();
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
GetAllowOps
();
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
Get
Mutable
AllowOps
();
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
GetBlockOps
();
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
Get
Mutable
BlockOps
();
private:
AmpOperators
();
// forbid calling default constructor
...
...
@@ -52,6 +52,8 @@ class AmpOperators {
std
::
shared_ptr
<
std
::
unordered_set
<
std
::
string
>>
block_ops_
;
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
AmpOperators
&
ops
);
// NOTE(zhiqiu): AutoCastGuard is used for RAII.
class
AutoCastGuard
{
public:
...
...
paddle/fluid/pybind/imperative.cc
浏览文件 @
7043b8cf
...
...
@@ -1257,27 +1257,30 @@ void BindImperative(py::module *m_ptr) {
py
::
return_value_policy
::
reference
)
.
def
(
"_generate_unique_name"
,
&
imperative
::
Tracer
::
GenerateUniqueName
,
py
::
arg
(
"key"
)
=
"dygraph_tmp"
)
.
def
(
"_set_amp_op_list"
,
[](
imperative
::
Tracer
&
self
,
std
::
unordered_set
<
std
::
string
>
&
allow_ops
,
std
::
unordered_set
<
std
::
string
>
&
block_ops
)
{
// NOTE(zhiqiu): The automatic conversion in pybind11 between
// c++
// STL and python set/list/dict involve a copy operation that
// prevents pass-by-reference semantics, so it is ok to swap.
// The reaseon why not directly pass
// std::shared_ptr<std::unordered_set<std::string>>
// is that pybind11 forbid shared_ptr<T> where T is not custom
// type.
imperative
::
AmpOperators
::
Instance
().
GetAllowOps
()
->
swap
(
allow_ops
);
imperative
::
AmpOperators
::
Instance
().
GetBlockOps
()
->
swap
(
block_ops
);
})
.
def
(
"_set_amp_op_list"
,
[](
imperative
::
Tracer
&
self
,
std
::
unordered_set
<
std
::
string
>
&
allow_ops
,
std
::
unordered_set
<
std
::
string
>
&
block_ops
)
{
// NOTE(zhiqiu): The automatic conversion in pybind11 between
// c++
// STL and python set/list/dict involve a copy operation that
// prevents pass-by-reference semantics, so it is ok to swap.
// The reaseon why not directly pass
// std::shared_ptr<std::unordered_set<std::string>>
// is that pybind11 forbid shared_ptr<T> where T is not custom
// type.
imperative
::
AmpOperators
::
Instance
().
GetMutableAllowOps
()
->
swap
(
allow_ops
);
imperative
::
AmpOperators
::
Instance
().
GetMutableBlockOps
()
->
swap
(
block_ops
);
VLOG
(
4
)
<<
"AMP operators changed, "
<<
imperative
::
AmpOperators
::
Instance
();
})
.
def
(
"_get_amp_op_list"
,
[](
imperative
::
Tracer
&
self
)
{
return
std
::
make_tuple
(
*
(
imperative
::
AmpOperators
::
Instance
().
GetAllowOps
()),
*
(
imperative
::
AmpOperators
::
Instance
().
GetBlockOps
()));
*
(
imperative
::
AmpOperators
::
Instance
().
Get
Mutable
AllowOps
()),
*
(
imperative
::
AmpOperators
::
Instance
().
Get
Mutable
BlockOps
()));
})
.
def
(
"trace"
,
[](
imperative
::
Tracer
&
self
,
const
std
::
string
&
type
,
...
...
python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py
浏览文件 @
7043b8cf
...
...
@@ -389,5 +389,21 @@ class TestResnet(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
out_fp32
[
0
],
out_amp
[
0
],
atol
=
1.e-2
))
class
TestLayerNormFp16
(
unittest
.
TestCase
):
r
''' layer_norm and batch_norm support mixed inputs, i.e., only input x is fp16
and other params are fp32.
'''
def
test_layer_norm_fp16
(
self
):
if
fluid
.
is_compiled_with_cuda
():
with
fluid
.
dygraph
.
guard
(
fluid
.
CUDAPlace
(
0
)):
x
=
paddle
.
rand
([
2
,
2
,
2
,
3
])
layer_norm
=
paddle
.
nn
.
LayerNorm
(
x
.
shape
[
1
:])
with
paddle
.
amp
.
auto_cast
(
custom_white_list
=
[
'layer_norm'
]):
out
=
layer_norm
(
x
)
self
.
assertTrue
(
out
.
dtype
==
fluid
.
core
.
VarDesc
.
VarType
.
FP16
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录