Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
0ca62744
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0ca62744
编写于
12月 11, 2017
作者:
D
dzhwinter
提交者:
GitHub
12月 11, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"add global regularization" (#6443)
* "add global regularization" * Polish `append_regularization_ops`
上级
5926e9a2
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
27 addition
and
26 deletion
+27
-26
python/paddle/v2/fluid/optimizer.py
python/paddle/v2/fluid/optimizer.py
+16
-22
python/paddle/v2/fluid/regularizer.py
python/paddle/v2/fluid/regularizer.py
+11
-4
未找到文件。
python/paddle/v2/fluid/optimizer.py
浏览文件 @
0ca62744
...
...
@@ -18,8 +18,9 @@ class Optimizer(object):
but need to use one of it's implementation.
"""
def
__init__
(
self
,
global_step
=
None
):
def
__init__
(
self
,
global_step
=
None
,
regularization
=
None
):
self
.
_global_step
=
global_step
self
.
regularization
=
regularization
# Dictionary of accumulators. Some optimizer subclasses need to
# allocate and manage extra variables associated with the parameters
# to train. These variables are called accumulators.
...
...
@@ -199,7 +200,8 @@ class Optimizer(object):
"""
params_grads
=
append_backward_ops
(
loss
,
parameter_list
,
no_grad_set
)
# Add regularization if any
params_grads
=
append_regularization_ops
(
params_grads
)
params_grads
=
append_regularization_ops
(
params_grads
,
self
.
regularization
)
optimize_ops
=
self
.
create_optimization_pass
(
params_grads
,
loss
,
startup_program
)
return
optimize_ops
...
...
@@ -209,9 +211,9 @@ class SGDOptimizer(Optimizer):
""" Simple SGD optimizer without any state.
"""
def
__init__
(
self
,
learning_rate
,
global_step
=
None
):
def
__init__
(
self
,
learning_rate
,
**
kwargs
):
assert
learning_rate
is
not
None
super
(
SGDOptimizer
,
self
).
__init__
(
global_step
)
super
(
SGDOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"sgd"
self
.
_learning_rate
=
learning_rate
...
...
@@ -236,14 +238,10 @@ class MomentumOptimizer(Optimizer):
"""
_velocity_acc_str
=
"velocity"
def
__init__
(
self
,
learning_rate
,
momentum
,
use_nesterov
=
False
,
global_step
=
None
):
def
__init__
(
self
,
learning_rate
,
momentum
,
use_nesterov
=
False
,
**
kwargs
):
assert
learning_rate
is
not
None
assert
momentum
is
not
None
super
(
MomentumOptimizer
,
self
).
__init__
(
global_step
)
super
(
MomentumOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"momentum"
self
.
_learning_rate
=
learning_rate
self
.
_momentum
=
momentum
...
...
@@ -284,10 +282,10 @@ class AdagradOptimizer(Optimizer):
"""
_moment_acc_str
=
"moment"
def
__init__
(
self
,
learning_rate
,
epsilon
=
1.0e-6
,
global_step
=
None
):
def
__init__
(
self
,
learning_rate
,
epsilon
=
1.0e-6
,
**
kwargs
):
assert
learning_rate
is
not
None
assert
epsilon
is
not
None
super
(
AdagradOptimizer
,
self
).
__init__
(
global_step
)
super
(
AdagradOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"adagrad"
self
.
_learning_rate
=
learning_rate
self
.
_epsilon
=
epsilon
...
...
@@ -331,12 +329,12 @@ class AdamOptimizer(Optimizer):
beta1
=
0.9
,
beta2
=
0.999
,
epsilon
=
1e-8
,
global_step
=
None
):
**
kwargs
):
assert
learning_rate
is
not
None
assert
beta1
is
not
None
assert
beta2
is
not
None
assert
epsilon
is
not
None
super
(
AdamOptimizer
,
self
).
__init__
(
global_step
)
super
(
AdamOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"adam"
self
.
_learning_rate
=
learning_rate
self
.
_beta1
=
beta1
...
...
@@ -436,12 +434,12 @@ class AdamaxOptimizer(Optimizer):
beta1
=
0.9
,
beta2
=
0.999
,
epsilon
=
1e-8
,
global_step
=
None
):
**
kwargs
):
assert
learning_rate
is
not
None
assert
beta1
is
not
None
assert
beta2
is
not
None
assert
epsilon
is
not
None
super
(
AdamaxOptimizer
,
self
).
__init__
()
super
(
AdamaxOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"adamax"
self
.
_learning_rate
=
learning_rate
self
.
_beta1
=
beta1
...
...
@@ -514,16 +512,12 @@ class DecayedAdagradOptimizer(Optimizer):
"""
_moment_acc_str
=
"moment"
def
__init__
(
self
,
learning_rate
,
decay
=
0.95
,
epsilon
=
1.0e-6
,
global_step
=
None
):
def
__init__
(
self
,
learning_rate
,
decay
=
0.95
,
epsilon
=
1.0e-6
,
**
kwargs
):
assert
learning_rate
is
not
None
assert
decay
is
not
None
assert
epsilon
is
not
None
super
(
DecayedAdagradOptimizer
,
self
).
__init__
(
global_step
)
super
(
DecayedAdagradOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"decayed_adagrad"
self
.
_learning_rate
=
learning_rate
self
.
_decay
=
decay
...
...
python/paddle/v2/fluid/regularizer.py
浏览文件 @
0ca62744
...
...
@@ -3,7 +3,7 @@ import framework
__all__
=
[
'append_regularization_ops'
,
'L1Decay'
,
'L2Decay'
]
def
append_regularization_ops
(
parameters_and_grads
):
def
append_regularization_ops
(
parameters_and_grads
,
regularization
=
None
):
"""Create and add backward regularization Operators
Creates and adds backward regularization operators in the BlockDesc.
...
...
@@ -14,6 +14,8 @@ def append_regularization_ops(parameters_and_grads):
Args:
parameters_and_grads: A list of (parameters, gradients) pairs
that need to be regularized.
regularization: A global regularizer. If the parameter is not
set. It will be applied with regularizer.
Returns:
list of (parameters, gradients) pair with the regularized gradient
...
...
@@ -23,14 +25,19 @@ def append_regularization_ops(parameters_and_grads):
"""
params_and_grads
=
[]
for
param
,
grad
in
parameters_and_grads
:
regularization_term
=
None
if
param
.
regularizer
is
not
None
:
# Add variable for regularization term in grad block
regularization_term
=
param
.
regularizer
(
param
,
grad
.
block
)
elif
regularization
is
not
None
:
regularization_term
=
regularization
(
param
,
grad
.
block
)
# If no gradient or no regularization specified,
# then we don't need to do anything
if
grad
is
None
or
param
.
regularizer
is
None
:
if
grad
is
None
or
regularization_term
is
None
:
params_and_grads
.
append
((
param
,
grad
))
continue
# Add variable for regularization term in grad block
regularization_term
=
param
.
regularizer
(
param
,
grad
.
block
)
assert
grad
.
shape
==
regularization_term
.
shape
grad
.
block
.
append_op
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录