Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8b587320
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8b587320
编写于
4月 11, 2019
作者:
W
Wu Yi
提交者:
GitHub
4月 11, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove append_LARS not used api test=develop (#16703)
上级
f96446ca
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
2 addition
and
47 deletion
+2
-47
paddle/fluid/API.spec
paddle/fluid/API.spec
+0
-1
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+2
-46
未找到文件。
paddle/fluid/API.spec
浏览文件 @
8b587320
...
...
@@ -361,7 +361,6 @@ paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_st
paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40'))
paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae'))
paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28'))
paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8'))
paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b'))
paddle.fluid.layers.linear_lr_warmup (ArgSpec(args=['learning_rate', 'warmup_steps', 'start_lr', 'end_lr'], varargs=None, keywords=None, defaults=None), ('document', '2ef3f5ca5cd71ea4217c418e5a7a0565'))
paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
8b587320
...
...
@@ -35,8 +35,8 @@ from ..dygraph import learning_rate_scheduler as imperate_lr
__all__
=
[
'exponential_decay'
,
'natural_exp_decay'
,
'inverse_time_decay'
,
'polynomial_decay'
,
'piecewise_decay'
,
'noam_decay'
,
'
append_LARS
'
,
'
cosine_decay'
,
'
linear_lr_warmup'
'polynomial_decay'
,
'piecewise_decay'
,
'noam_decay'
,
'
cosine_decay
'
,
'linear_lr_warmup'
]
...
...
@@ -381,50 +381,6 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
return
decayed_lr
def
append_LARS
(
params_grads
,
learning_rate
,
weight_decay
):
"""
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer.
Args:
learning_rate: A learning rate Variable. This
is the global learning rate for LARS.
weight_decay: A Python `float` number.
Returns:
The decayed learning rate
Examples:
.. code-block:: python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
"""
assert
not
imperative_base
.
enabled
(
),
"append_LARS is NOT supported in dygraph mode now"
def
_balanced_weight
(
param_norm
,
grad_norm
):
if
weight_decay
==
1.0
:
return
grad_norm
+
param_norm
else
:
return
grad_norm
+
weight_decay
*
param_norm
for
param
,
grad
in
params_grads
:
with
param
.
block
.
program
.
optimized_guard
(
[
param
,
grad
]),
name_scope
(
"optimizer"
):
param_lr
=
param
.
optimize_attr
[
'learning_rate'
]
param_norm
=
ops
.
sqrt
(
nn
.
reduce_sum
(
input
=
ops
.
square
(
param
)))
grad_norm
=
ops
.
sqrt
(
nn
.
reduce_sum
(
input
=
ops
.
square
(
grad
)))
if
type
(
param_lr
)
==
float
and
param_lr
==
1.0
:
decayed_lr
=
learning_rate
*
param_norm
\
/
_balanced_weight
(
param_norm
,
grad_norm
)
else
:
decayed_lr
=
learning_rate
*
param_lr
*
param_norm
\
/
_balanced_weight
(
param_norm
,
grad_norm
)
# set back param local learning rate
param
.
optimize_attr
[
'learning_rate'
]
=
decayed_lr
def
linear_lr_warmup
(
learning_rate
,
warmup_steps
,
start_lr
,
end_lr
):
"""
Applies linear learning rate warmup before the normal learning rate
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录