Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
0ca62744
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0ca62744
编写于
12月 11, 2017
作者:
D
dzhwinter
提交者:
GitHub
12月 11, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"add global regularization" (#6443)
* "add global regularization" * Polish `append_regularization_ops`
上级
5926e9a2
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
27 addition
and
26 deletion
+27
-26
python/paddle/v2/fluid/optimizer.py
python/paddle/v2/fluid/optimizer.py
+16
-22
python/paddle/v2/fluid/regularizer.py
python/paddle/v2/fluid/regularizer.py
+11
-4
未找到文件。
python/paddle/v2/fluid/optimizer.py
浏览文件 @
0ca62744
...
...
@@ -18,8 +18,9 @@ class Optimizer(object):
but need to use one of it's implementation.
"""
def
__init__
(
self
,
global_step
=
None
):
def
__init__
(
self
,
global_step
=
None
,
regularization
=
None
):
self
.
_global_step
=
global_step
self
.
regularization
=
regularization
# Dictionary of accumulators. Some optimizer subclasses need to
# allocate and manage extra variables associated with the parameters
# to train. These variables are called accumulators.
...
...
@@ -199,7 +200,8 @@ class Optimizer(object):
"""
params_grads
=
append_backward_ops
(
loss
,
parameter_list
,
no_grad_set
)
# Add regularization if any
params_grads
=
append_regularization_ops
(
params_grads
)
params_grads
=
append_regularization_ops
(
params_grads
,
self
.
regularization
)
optimize_ops
=
self
.
create_optimization_pass
(
params_grads
,
loss
,
startup_program
)
return
optimize_ops
...
...
@@ -209,9 +211,9 @@ class SGDOptimizer(Optimizer):
""" Simple SGD optimizer without any state.
"""
def
__init__
(
self
,
learning_rate
,
global_step
=
None
):
def
__init__
(
self
,
learning_rate
,
**
kwargs
):
assert
learning_rate
is
not
None
super
(
SGDOptimizer
,
self
).
__init__
(
global_step
)
super
(
SGDOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"sgd"
self
.
_learning_rate
=
learning_rate
...
...
@@ -236,14 +238,10 @@ class MomentumOptimizer(Optimizer):
"""
_velocity_acc_str
=
"velocity"
def
__init__
(
self
,
learning_rate
,
momentum
,
use_nesterov
=
False
,
global_step
=
None
):
def
__init__
(
self
,
learning_rate
,
momentum
,
use_nesterov
=
False
,
**
kwargs
):
assert
learning_rate
is
not
None
assert
momentum
is
not
None
super
(
MomentumOptimizer
,
self
).
__init__
(
global_step
)
super
(
MomentumOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"momentum"
self
.
_learning_rate
=
learning_rate
self
.
_momentum
=
momentum
...
...
@@ -284,10 +282,10 @@ class AdagradOptimizer(Optimizer):
"""
_moment_acc_str
=
"moment"
def
__init__
(
self
,
learning_rate
,
epsilon
=
1.0e-6
,
global_step
=
None
):
def
__init__
(
self
,
learning_rate
,
epsilon
=
1.0e-6
,
**
kwargs
):
assert
learning_rate
is
not
None
assert
epsilon
is
not
None
super
(
AdagradOptimizer
,
self
).
__init__
(
global_step
)
super
(
AdagradOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"adagrad"
self
.
_learning_rate
=
learning_rate
self
.
_epsilon
=
epsilon
...
...
@@ -331,12 +329,12 @@ class AdamOptimizer(Optimizer):
beta1
=
0.9
,
beta2
=
0.999
,
epsilon
=
1e-8
,
global_step
=
None
):
**
kwargs
):
assert
learning_rate
is
not
None
assert
beta1
is
not
None
assert
beta2
is
not
None
assert
epsilon
is
not
None
super
(
AdamOptimizer
,
self
).
__init__
(
global_step
)
super
(
AdamOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"adam"
self
.
_learning_rate
=
learning_rate
self
.
_beta1
=
beta1
...
...
@@ -436,12 +434,12 @@ class AdamaxOptimizer(Optimizer):
beta1
=
0.9
,
beta2
=
0.999
,
epsilon
=
1e-8
,
global_step
=
None
):
**
kwargs
):
assert
learning_rate
is
not
None
assert
beta1
is
not
None
assert
beta2
is
not
None
assert
epsilon
is
not
None
super
(
AdamaxOptimizer
,
self
).
__init__
()
super
(
AdamaxOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"adamax"
self
.
_learning_rate
=
learning_rate
self
.
_beta1
=
beta1
...
...
@@ -514,16 +512,12 @@ class DecayedAdagradOptimizer(Optimizer):
"""
_moment_acc_str
=
"moment"
def
__init__
(
self
,
learning_rate
,
decay
=
0.95
,
epsilon
=
1.0e-6
,
global_step
=
None
):
def
__init__
(
self
,
learning_rate
,
decay
=
0.95
,
epsilon
=
1.0e-6
,
**
kwargs
):
assert
learning_rate
is
not
None
assert
decay
is
not
None
assert
epsilon
is
not
None
super
(
DecayedAdagradOptimizer
,
self
).
__init__
(
global_step
)
super
(
DecayedAdagradOptimizer
,
self
).
__init__
(
**
kwargs
)
self
.
type
=
"decayed_adagrad"
self
.
_learning_rate
=
learning_rate
self
.
_decay
=
decay
...
...
python/paddle/v2/fluid/regularizer.py
浏览文件 @
0ca62744
...
...
@@ -3,7 +3,7 @@ import framework
__all__
=
[
'append_regularization_ops'
,
'L1Decay'
,
'L2Decay'
]
def
append_regularization_ops
(
parameters_and_grads
):
def
append_regularization_ops
(
parameters_and_grads
,
regularization
=
None
):
"""Create and add backward regularization Operators
Creates and adds backward regularization operators in the BlockDesc.
...
...
@@ -14,6 +14,8 @@ def append_regularization_ops(parameters_and_grads):
Args:
parameters_and_grads: A list of (parameters, gradients) pairs
that need to be regularized.
regularization: A global regularizer. If the parameter is not
set. It will be applied with regularizer.
Returns:
list of (parameters, gradients) pair with the regularized gradient
...
...
@@ -23,14 +25,19 @@ def append_regularization_ops(parameters_and_grads):
"""
params_and_grads
=
[]
for
param
,
grad
in
parameters_and_grads
:
regularization_term
=
None
if
param
.
regularizer
is
not
None
:
# Add variable for regularization term in grad block
regularization_term
=
param
.
regularizer
(
param
,
grad
.
block
)
elif
regularization
is
not
None
:
regularization_term
=
regularization
(
param
,
grad
.
block
)
# If no gradient or no regularization specified,
# then we don't need to do anything
if
grad
is
None
or
param
.
regularizer
is
None
:
if
grad
is
None
or
regularization_term
is
None
:
params_and_grads
.
append
((
param
,
grad
))
continue
# Add variable for regularization term in grad block
regularization_term
=
param
.
regularizer
(
param
,
grad
.
block
)
assert
grad
.
shape
==
regularization_term
.
shape
grad
.
block
.
append_op
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录