Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d6f72c4f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d6f72c4f
编写于
3月 26, 2020
作者:
A
Aurelius84
提交者:
GitHub
3月 26, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add parameter(learning_rate) in NoamDecay (#23156)
* Add parameter(learning_rate) in NoamDecay test=develop
上级
af926306
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
66 addition
and
8 deletion
+66
-8
python/paddle/fluid/dygraph/learning_rate_scheduler.py
python/paddle/fluid/dygraph/learning_rate_scheduler.py
+14
-3
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+13
-5
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
...dle/fluid/tests/unittests/test_learning_rate_scheduler.py
+39
-0
未找到文件。
python/paddle/fluid/dygraph/learning_rate_scheduler.py
浏览文件 @
d6f72c4f
...
...
@@ -517,7 +517,7 @@ class NoamDecay(LearningRateDecay):
.. math::
decayed\_learning\_rate = d_{model}^{-0.5} * min(global\_step^{-0.5}, global\_step * warmup\_steps^{-1.5})
decayed\_learning\_rate =
learning\_rate *
d_{model}^{-0.5} * min(global\_step^{-0.5}, global\_step * warmup\_steps^{-1.5})
Please reference `attention is all you need <https://arxiv.org/pdf/1706.03762.pdf>`_
...
...
@@ -531,6 +531,9 @@ class NoamDecay(LearningRateDecay):
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.
learning_rate(Variable|float|int): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number. Default 1.0
Returns:
None.
...
...
@@ -550,8 +553,15 @@ class NoamDecay(LearningRateDecay):
parameter_list = emb.parameters())
"""
def
__init__
(
self
,
d_model
,
warmup_steps
,
begin
=
1
,
step
=
1
,
dtype
=
'float32'
):
def
__init__
(
self
,
d_model
,
warmup_steps
,
begin
=
1
,
step
=
1
,
dtype
=
'float32'
,
learning_rate
=
1.0
):
super
(
NoamDecay
,
self
).
__init__
(
begin
,
step
,
dtype
)
self
.
learning_rate
=
learning_rate
self
.
d_model
=
d_model
self
.
warmup_steps
=
warmup_steps
...
...
@@ -559,7 +569,8 @@ class NoamDecay(LearningRateDecay):
from
..
import
layers
a
=
self
.
create_lr_var
(
self
.
step_num
**-
0.5
)
b
=
self
.
create_lr_var
((
self
.
warmup_steps
**-
1.5
)
*
self
.
step_num
)
lr_value
=
(
self
.
d_model
**-
0.5
)
*
layers
.
elementwise_min
(
a
,
b
)
lr_value
=
self
.
learning_rate
*
(
self
.
d_model
**-
0.5
)
*
layers
.
elementwise_min
(
a
,
b
)
return
lr_value
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
d6f72c4f
...
...
@@ -49,7 +49,7 @@ def _decay_step_counter(begin=0):
return
global_step
def
noam_decay
(
d_model
,
warmup_steps
):
def
noam_decay
(
d_model
,
warmup_steps
,
learning_rate
=
1.0
):
"""
Noam decay method. The numpy implementation of noam decay as follows.
...
...
@@ -58,11 +58,12 @@ def noam_decay(d_model, warmup_steps):
import paddle.fluid as fluid
import numpy as np
# set hyper parameters
base_lr = 0.01
d_model = 2
current_steps = 20
warmup_steps = 200
# compute
lr_value = np.power(d_model, -0.5) * np.min([
lr_value =
base_lr *
np.power(d_model, -0.5) * np.min([
np.power(current_steps, -0.5),
np.power(warmup_steps, -1.5) * current_steps])
...
...
@@ -74,6 +75,10 @@ def noam_decay(d_model, warmup_steps):
warmup_steps(Variable): A super parameter.
learning_rate(Variable|float|int): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number. Default 1.0
Returns:
The decayed learning rate.
Examples:
...
...
@@ -84,18 +89,21 @@ def noam_decay(d_model, warmup_steps):
learning_rate = 0.01
lr = fluid.layers.learning_rate_scheduler.noam_decay(
1/(warmup_steps *(learning_rate ** 2)),
warmup_steps)
warmup_steps,
learning_rate)
"""
with
default_main_program
().
_lr_schedule_guard
():
if
in_dygraph_mode
():
decay
=
imperate_lr
.
NoamDecay
(
d_model
,
warmup_steps
)
decay
=
imperate_lr
.
NoamDecay
(
d_model
,
warmup_steps
,
learning_rate
=
learning_rate
)
return
decay
else
:
global_step
=
_decay_step_counter
(
1
)
a
=
global_step
**-
0.5
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
lr_value
=
learning_rate
*
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
return
lr_value
...
...
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
浏览文件 @
d6f72c4f
...
...
@@ -89,6 +89,34 @@ def cosine_decay(global_step, learning_rate, step_each_epoch, epochs):
return
decayed_lr
def
noam_decay
(
global_step
,
d_model
,
warmup_steps
,
learning_rate
=
1.0
):
a
=
math
.
pow
(
global_step
,
-
0.5
)
b
=
math
.
pow
(
warmup_steps
,
-
1.5
)
*
global_step
decayed_lr
=
learning_rate
*
math
.
pow
(
d_model
,
-
0.5
)
*
min
(
a
,
b
)
return
decayed_lr
class
TestNoamLearningRateDecayDygraphMode
(
unittest
.
TestCase
):
def
test_dygraph_mode
(
self
):
with
fluid
.
dygraph
.
guard
():
d_model
=
0.01
warmup_steps
=
200
learning_rate
=
2.0
lr
=
fluid
.
layers
.
noam_decay
(
d_model
,
warmup_steps
,
learning_rate
)
for
step
in
range
(
5
):
step
+=
1
right_result
=
noam_decay
(
step
,
d_model
,
warmup_steps
,
learning_rate
)
fluid_result
=
lr
()
self
.
assertAlmostEqual
(
right_result
,
fluid_result
[
0
],
msg
=
'Failed lr scheduler in step {0}, Python result is {1}, Fluid result is {2}'
.
format
(
step
,
right_result
,
fluid_result
[
0
]))
class
TestLearningRateDecay
(
unittest
.
TestCase
):
def
check_decay
(
self
,
python_decay_fn
,
fluid_decay_fn
,
kwargs
):
places
=
[
fluid
.
CPUPlace
()]
...
...
@@ -112,6 +140,9 @@ class TestLearningRateDecay(unittest.TestCase):
exe
.
run
(
startup_prog
)
for
step
in
range
(
10
):
# Step of NoamDecay starts from 1.
if
python_decay_fn
.
__name__
==
'noam_decay'
:
step
+=
1
lr_val
,
=
exe
.
run
(
main_prog
,
feed
=
{},
fetch_list
=
[
decayed_lr
])
python_decayed_lr
=
python_decay_fn
(
global_step
=
float
(
step
),
**
kwargs
)
...
...
@@ -159,6 +190,11 @@ class TestLearningRateDecay(unittest.TestCase):
"step_each_epoch"
:
100
,
"epochs"
:
120
}),
(
noam_decay
,
layers
.
noam_decay
,
{
"d_model"
:
0.01
,
"warmup_steps"
:
200
,
"learning_rate"
:
2.0
}),
]
for
py_decay_fn
,
fluid_decay_fn
,
kwargs
in
decay_fns
:
...
...
@@ -195,6 +231,9 @@ class TestLinearWamrupLearningRateDecay(TestLearningRateDecay):
exe
.
run
(
startup_prog
)
for
step
in
range
(
20
):
# Step of NoamDecay starts from 1.
if
fluid_decay_fn
.
__name__
==
'noam_decay'
:
step
+=
1
lr_val
,
=
exe
.
run
(
main_prog
,
feed
=
{},
fetch_list
=
[
decayed_lr
])
if
step
<
warmup_steps
:
python_decayed_lr
=
linear_lr_warmup
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录