Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c47d6729
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c47d6729
编写于
2月 10, 2022
作者:
S
sneaxiy
提交者:
GitHub
2月 10, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add _get_parameter method to Lamb optimizer (#39416)
* add _get_parameter func to lamb * remove duplicate code
上级
32d79bb9
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
53 addition
and
7 deletion
+53
-7
python/paddle/fluid/tests/unittests/test_lambv2_op.py
python/paddle/fluid/tests/unittests/test_lambv2_op.py
+30
-5
python/paddle/optimizer/lamb.py
python/paddle/optimizer/lamb.py
+23
-2
未找到文件。
python/paddle/fluid/tests/unittests/test_lambv2_op.py
浏览文件 @
c47d6729
...
@@ -195,32 +195,57 @@ class TestLambOpMultiPrecision(unittest.TestCase):
...
@@ -195,32 +195,57 @@ class TestLambOpMultiPrecision(unittest.TestCase):
hidden
=
linear
(
x
)
hidden
=
linear
(
x
)
loss
=
paddle
.
mean
(
hidden
)
loss
=
paddle
.
mean
(
hidden
)
optimizer
=
paddle
.
optimizer
.
Lamb
(
learning_rate
=
1e-3
)
o
riginal_o
ptimizer
=
paddle
.
optimizer
.
Lamb
(
learning_rate
=
1e-3
)
optimizer
.
_multi_precision
=
multi_precision
o
riginal_o
ptimizer
.
_multi_precision
=
multi_precision
if
multi_precision
:
if
multi_precision
:
optimizer
=
paddle
.
static
.
amp
.
decorate
(
optimizer
=
paddle
.
static
.
amp
.
decorate
(
optimizer
,
use_pure_fp16
=
True
,
use_fp16_guard
=
True
)
original_optimizer
,
use_pure_fp16
=
True
,
use_fp16_guard
=
True
)
else
:
optimizer
=
original_optimizer
optimizer
.
minimize
(
loss
)
optimizer
.
minimize
(
loss
)
weight
,
bias
=
linear
.
weight
,
linear
.
bias
weight
,
bias
=
linear
.
weight
,
linear
.
bias
scope
=
paddle
.
static
.
Scope
()
exe
=
paddle
.
static
.
Executor
(
place
)
exe
=
paddle
.
static
.
Executor
(
place
)
scope
=
paddle
.
static
.
Scope
()
scope
=
paddle
.
static
.
Scope
()
x
=
main_prog
.
global_block
().
var
(
x
.
name
)
x
=
main_prog
.
global_block
().
var
(
x
.
name
)
if
x
.
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
if
x
.
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
x_np
=
x_np
.
astype
(
np
.
float16
)
x_np
=
x_np
.
astype
(
np
.
float16
)
def
get_parameter
(
var
):
name
=
var
if
isinstance
(
var
,
(
str
,
bytes
))
else
var
.
name
params
=
original_optimizer
.
_get_parameter
(
name
,
scope
)
assert
isinstance
(
params
,
(
list
,
tuple
))
params
=
list
(
params
)
assert
len
(
params
)
==
2
if
multi_precision
:
params
[
0
]
=
np
.
array
(
params
[
0
])
params
[
1
]
=
np
.
array
(
params
[
1
])
self
.
assertTrue
(
np
.
array_equal
(
params
[
0
],
params
[
1
].
astype
(
np
.
float16
)))
return
params
[
0
].
astype
(
np
.
float32
)
else
:
self
.
assertTrue
(
params
[
0
]
is
not
None
)
self
.
assertTrue
(
params
[
1
]
is
None
)
params
[
0
]
=
np
.
array
(
params
[
0
])
return
params
[
0
]
with
paddle
.
static
.
scope_guard
(
scope
):
with
paddle
.
static
.
scope_guard
(
scope
):
exe
.
run
(
startup_prog
)
exe
.
run
(
startup_prog
)
if
multi_precision
:
if
multi_precision
:
optimizer
.
amp_init
(
place
)
optimizer
.
amp_init
(
place
)
weight_np
,
bias_np
=
None
,
None
weight_np
,
bias_np
=
None
,
None
for
i
in
range
(
n
):
for
i
in
range
(
n
):
feed_dict
=
{
x
.
name
:
x_np
}
feed_dict
=
{
x
.
name
:
x_np
}
weight_np
,
bias_np
=
exe
.
run
(
main_prog
,
weight_np
,
bias_np
=
exe
.
run
(
main_prog
,
feed
=
feed_dict
,
feed
=
feed_dict
,
fetch_list
=
[
weight
,
bias
])
fetch_list
=
[
weight
,
bias
])
return
weight_np
.
astype
(
'float32'
),
bias_np
.
astype
(
'float32'
)
weight_np
=
weight_np
.
astype
(
'float32'
)
bias_np
=
bias_np
.
astype
(
'float32'
)
self
.
assertTrue
(
np
.
array_equal
(
weight_np
,
get_parameter
(
weight
)))
self
.
assertTrue
(
np
.
array_equal
(
bias_np
,
get_parameter
(
bias
)))
return
weight_np
,
bias_np
@
switch_to_static_graph
@
switch_to_static_graph
def
test_main
(
self
):
def
test_main
(
self
):
...
...
python/paddle/optimizer/lamb.py
浏览文件 @
c47d6729
...
@@ -20,6 +20,7 @@ from ..fluid import layers
...
@@ -20,6 +20,7 @@ from ..fluid import layers
from
..fluid
import
unique_name
from
..fluid
import
unique_name
from
..fluid.layer_helper
import
LayerHelper
from
..fluid.layer_helper
import
LayerHelper
from
paddle
import
_C_ops
from
paddle
import
_C_ops
from
paddle.fluid.executor
import
global_scope
__all__
=
[]
__all__
=
[]
...
@@ -131,9 +132,25 @@ class Lamb(Optimizer):
...
@@ -131,9 +132,25 @@ class Lamb(Optimizer):
'exclude_from_weight_decay_fn'
:
exclude_from_weight_decay_fn
,
'exclude_from_weight_decay_fn'
:
exclude_from_weight_decay_fn
,
}
}
self
.
_master_weights
=
{}
self
.
_master_weights
=
{}
self
.
_used_master_weights
=
{}
# TODO(zengjinle): expose API as soon as possible
# TODO(zengjinle): expose API as soon as possible
self
.
_multi_precision
=
False
self
.
_multi_precision
=
False
def
_get_parameter
(
self
,
name
,
scope
=
None
):
if
scope
is
None
:
scope
=
global_scope
()
p_t
=
scope
.
find_var
(
name
).
get_tensor
()
master_name
=
self
.
_used_master_weights
.
get
(
name
)
if
master_name
is
not
None
:
master_p_t
=
scope
.
find_var
(
master_name
).
get_tensor
()
assert
master_p_t
.
_dtype
()
!=
p_t
.
_dtype
()
assert
master_p_t
.
shape
()
==
p_t
.
shape
()
else
:
master_p_t
=
None
return
p_t
,
master_p_t
def
_create_master_weight
(
self
,
param
):
def
_create_master_weight
(
self
,
param
):
assert
self
.
_multi_precision
assert
self
.
_multi_precision
if
param
.
name
in
self
.
_master_weights
:
if
param
.
name
in
self
.
_master_weights
:
...
@@ -243,8 +260,12 @@ class Lamb(Optimizer):
...
@@ -243,8 +260,12 @@ class Lamb(Optimizer):
find_master
=
self
.
_multi_precision
and
param_and_grad
[
find_master
=
self
.
_multi_precision
and
param_and_grad
[
0
].
dtype
==
core
.
VarDesc
.
VarType
.
FP16
0
].
dtype
==
core
.
VarDesc
.
VarType
.
FP16
master_weight
=
self
.
_master_weights
[
param_and_grad
[
0
]
p_name
=
param_and_grad
[
0
].
name
.
name
]
if
find_master
else
None
if
find_master
:
master_weight
=
self
.
_master_weights
[
p_name
]
self
.
_used_master_weights
[
p_name
]
=
master_weight
.
name
else
:
master_weight
=
None
found_inf
=
self
.
_get_auxiliary_var
(
'found_inf'
)
found_inf
=
self
.
_get_auxiliary_var
(
'found_inf'
)
if
framework
.
in_dygraph_mode
():
if
framework
.
in_dygraph_mode
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录