Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
175cf6e0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
175cf6e0
编写于
2月 26, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add global_step in nn.py
上级
95ea54fd
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
101 addition
and
104 deletion
+101
-104
python/paddle/fluid/layer_helper.py
python/paddle/fluid/layer_helper.py
+19
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+23
-0
python/paddle/fluid/learning_rate_decay.py
python/paddle/fluid/learning_rate_decay.py
+43
-64
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+1
-24
python/paddle/fluid/tests/book/test_label_semantic_roles.py
python/paddle/fluid/tests/book/test_label_semantic_roles.py
+1
-5
python/paddle/fluid/tests/unittests/test_learning_rate_decay.py
.../paddle/fluid/tests/unittests/test_learning_rate_decay.py
+14
-11
未找到文件。
python/paddle/fluid/layer_helper.py
浏览文件 @
175cf6e0
...
...
@@ -330,9 +330,28 @@ class LayerHelper(object):
return
self
.
main_program
.
current_block
().
create_var
(
*
args
,
**
kwargs
)
def
create_global_variable
(
self
,
persistable
=
False
,
*
args
,
**
kwargs
):
"""
create global variable, note that there is no initializer for this global variable.
Args:
persistable(bool): True if it is a checkpoint value.
*args: See create_var's documentation
**kwargs: See create_var's documentation
Returns(Variable): the created variable.
"""
return
self
.
main_program
.
global_block
().
create_var
(
*
args
,
persistable
=
persistable
,
**
kwargs
)
def
create_or_get_global_variable
(
self
,
name
,
*
args
,
**
kwargs
):
"""
Creates a global variable if not exists and returns the variable and
a boolean flag which is true when it is a new variable.
"""
if
self
.
main_program
.
global_block
().
has_var
(
name
):
return
self
.
main_program
.
global_block
().
var
(
name
),
False
else
:
return
self
.
create_global_variable
(
name
=
name
,
*
args
,
**
kwargs
),
True
def
set_variable_initializer
(
self
,
var
,
initializer
):
assert
isinstance
(
var
,
Variable
)
self
.
startup_program
.
global_block
().
create_var
(
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
175cf6e0
...
...
@@ -69,6 +69,7 @@ __all__ = [
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'global_step_counter'
,
]
...
...
@@ -3250,3 +3251,25 @@ def one_hot(input, depth):
attrs
=
{
'depth'
:
depth
},
outputs
=
{
'Out'
:
one_hot_out
})
return
one_hot_out
def
global_step_counter
():
"""
Return the run counter of the main program, which is started with 1.
Returns(Variable): The global run counter.
"""
helper
=
LayerHelper
(
'global_step_counter'
)
counter_name
=
'@STEP_COUNTER@'
counter
,
is_new_var
=
helper
.
create_or_get_global_variable
(
name
=
counter_name
,
dtype
=
'int64'
,
shape
=
[
1
],
persistable
=
True
)
if
is_new_var
:
helper
.
set_variable_initializer
(
counter
,
initializer
=
Constant
(
value
=
0
,
force_cpu
=
True
))
helper
.
main_program
.
global_block
().
prepend_op
(
type
=
'increment'
,
inputs
=
{
'X'
:
[
counter
]},
outputs
=
{
'Out'
:
[
counter
]})
counter
.
stop_gradient
=
True
return
counter
python/paddle/fluid/learning_rate_decay.py
浏览文件 @
175cf6e0
...
...
@@ -30,11 +30,14 @@ strategy according to this module.
"""
def
exponential_decay
(
learning_rate
,
global_step
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
float_global_step
():
# the first global step is zero in learning rate decay
global_step
=
layers
.
global_step_counter
()
-
1
global_step
=
layers
.
cast
(
global_step
,
'float32'
)
return
global_step
def
exponential_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
"""Applies exponential decay to the learning rate.
```python
...
...
@@ -44,7 +47,6 @@ def exponential_decay(learning_rate,
Args:
learning_rate: A scalar float32 value or a Variable. This
will be the initial learning rate during training
global_step: A Variable that record the training step.
decay_steps: A Python `int32` number.
decay_rate: A Python `float` number.
staircase: Boolean. If set true, decay the learning rate every decay_steps.
...
...
@@ -52,8 +54,7 @@ def exponential_decay(learning_rate,
Returns:
The decayed learning rate
"""
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for exponential_decay."
)
global_step
=
float_global_step
()
with
init_on_cpu
():
# update learning_rate
...
...
@@ -65,23 +66,17 @@ def exponential_decay(learning_rate,
return
decayed_lr
def
natural_exp_decay
(
learning_rate
,
global_step
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
natural_exp_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
"""Applies natural exponential decay to the initial learning rate.
```python
if not staircase:
decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
else:
decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
```
>>> if not staircase:
>>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
>>> else:
>>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
Args:
learning_rate: A scalar float32 value or a Variable. This
will be the initial learning rate during training
global_step: A Variable that record the training step.
decay_steps: A Python `int32` number.
decay_rate: A Python `float` number.
staircase: Boolean. If set true, decay the learning rate every decay_steps.
...
...
@@ -89,8 +84,7 @@ def natural_exp_decay(learning_rate,
Returns:
The decayed learning rate
"""
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for natural_exp_decay."
)
global_step
=
float_global_step
()
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
...
...
@@ -101,23 +95,17 @@ def natural_exp_decay(learning_rate,
return
decayed_lr
def
inverse_time_decay
(
learning_rate
,
global_step
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
inverse_time_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
"""Applies inverse time decay to the initial learning rate.
```python
if staircase:
decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
else:
decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
```
>>> if staircase:
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
>>> else:
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
Args:
learning_rate: A scalar float32 value or a Variable. This
will be the initial learning rate during training
global_step: A Variable that record the training step.
will be the initial learning rate during training.
decay_steps: A Python `int32` number.
decay_rate: A Python `float` number.
staircase: Boolean. If set true, decay the learning rate every decay_steps.
...
...
@@ -125,8 +113,7 @@ def inverse_time_decay(learning_rate,
Returns:
The decayed learning rate
"""
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for inverse_time_decay."
)
global_step
=
float_global_step
()
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
...
...
@@ -139,26 +126,22 @@ def inverse_time_decay(learning_rate,
def
polynomial_decay
(
learning_rate
,
global_step
,
decay_steps
,
end_learning_rate
=
0.0001
,
power
=
1.0
,
cycle
=
False
):
"""Applies polynomial decay to the initial learning rate.
```python
if cycle:
decay_steps = decay_steps * ceil(global_step / decay_steps)
else:
global_step = min(global_step, decay_steps)
decayed_learning_rate = (learning_rate - end_learning_rate) *
(1 - global_step / decay_steps) ^ power +
end_learning_rate
```
>>> if cycle:
>>> decay_steps = decay_steps * ceil(global_step / decay_steps)
>>> else:
>>> global_step = min(global_step, decay_steps)
>>> decayed_learning_rate = (learning_rate - end_learning_rate) *
>>> (1 - global_step / decay_steps) ^ power +
>>> end_learning_rate
Args:
learning_rate: A scalar float32 value or a Variable. This
will be the initial learning rate during training
global_step: A Variable that record the training step.
decay_steps: A Python `int32` number.
end_learning_rate: A Python `float` number.
power: A Python `float` number
...
...
@@ -167,8 +150,7 @@ def polynomial_decay(learning_rate,
Returns:
The decayed learning rate
"""
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for inverse_time_decay."
)
global_step
=
float_global_step
()
with
init_on_cpu
():
if
cycle
:
...
...
@@ -193,27 +175,24 @@ def polynomial_decay(learning_rate,
return
decayed_lr
def
piecewise_decay
(
global_step
,
boundaries
,
values
):
def
piecewise_decay
(
boundaries
,
values
):
"""Applies piecewise decay to the initial learning rate.
```python
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
if step < 10000:
learning_rate = 1.0
elif step >= 10000 and step < 20000:
learning_rate = 0.5
else:
learning_rate = 0.1
```
>>> boundaries = [10000, 20000]
>>> values = [1.0, 0.5, 0.1]
>>>
>>> if step < 10000:
>>> learning_rate = 1.0
>>> elif 10000 <= step < 20000:
>>> learning_rate = 0.5
>>> else:
>>> learning_rate = 0.1
"""
if
len
(
values
)
-
len
(
boundaries
)
!=
1
:
raise
ValueError
(
"len(values) - len(boundaries) should be 1"
)
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for piecewise_decay."
)
global_step
=
float_global_step
()
with
init_on_cpu
():
lr
=
layers
.
create_global_var
(
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
175cf6e0
...
...
@@ -35,9 +35,8 @@ class Optimizer(object):
but need to use one of it's implementation.
"""
def
__init__
(
self
,
learning_rate
,
global_step
=
None
,
regularization
=
None
):
def
__init__
(
self
,
learning_rate
,
regularization
=
None
):
assert
learning_rate
is
not
None
self
.
_global_step
=
global_step
self
.
regularization
=
regularization
self
.
_global_learning_rate
=
learning_rate
# Dictionary of accumulators. Some optimizer subclasses need to
...
...
@@ -144,26 +143,6 @@ class Optimizer(object):
format
(
name
,
param
.
name
))
return
self
.
_accumulators
[
name
][
param
.
name
]
def
_increment_global_step
(
self
,
block
):
"""Increment the global step by 1 after every iteration
Args:
block: the block in which the loss variable is present
Returns:
list with global_step increment op as its only element
"""
assert
isinstance
(
block
,
framework
.
Block
)
assert
self
.
_global_step
is
not
None
# create the increment op
increment_op
=
block
.
append_op
(
type
=
"increment"
,
inputs
=
{
"X"
:
self
.
_global_step
},
outputs
=
{
"Out"
:
self
.
_global_step
},
attrs
=
{
"step"
:
1.0
})
return
increment_op
def
create_optimization_pass
(
self
,
parameters_and_grads
,
loss
,
...
...
@@ -210,8 +189,6 @@ class Optimizer(object):
# FIXME: Need to fix this once we figure out how to handle dependencies
self
.
_finish_update
(
loss
.
block
)
if
self
.
_global_step
is
not
None
:
self
.
_increment_global_step
(
loss
.
block
)
end
=
len
(
global_block
.
ops
)
return
global_block
.
slice_ops
(
start
,
end
)
...
...
python/paddle/fluid/tests/book/test_label_semantic_roles.py
浏览文件 @
175cf6e0
...
...
@@ -168,16 +168,12 @@ def train(use_cuda, save_dirname=None):
# TODO(qiao)
# check other optimizers and check why out will be NAN
global_step
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0
,
dtype
=
'float32'
,
force_cpu
=
True
,
persistable
=
True
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
fluid
.
learning_rate_decay
.
exponential_decay
(
learning_rate
=
0.0001
,
global_step
=
global_step
,
decay_steps
=
100000
,
decay_rate
=
0.5
,
staircase
=
True
),
global_step
=
global_step
)
staircase
=
True
))
sgd_optimizer
.
minimize
(
avg_cost
)
# TODO(qiao)
...
...
python/paddle/fluid/tests/unittests/test_learning_rate_decay.py
浏览文件 @
175cf6e0
...
...
@@ -28,7 +28,7 @@ def exponential_decay(learning_rate,
decay_steps
,
decay_rate
,
staircase
=
False
):
exponent
=
float
(
global_step
)
/
float
(
decay_steps
)
exponent
=
global_step
/
decay_steps
if
staircase
:
exponent
=
math
.
floor
(
exponent
)
return
learning_rate
*
decay_rate
**
exponent
...
...
@@ -83,22 +83,25 @@ def piecewise_decay(global_step, boundaries, values):
class
TestLearningRateDecay
(
unittest
.
TestCase
):
def
check_decay
(
self
,
python_decay_fn
,
fluid_decay_fn
,
kwargs
):
global_step
=
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
)
decayed_lr
=
fluid_decay_fn
(
global_step
=
global_step
,
**
kwargs
)
layers
.
increment
(
global_step
,
1.0
)
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
for
step
in
range
(
10
):
step_val
,
lr_val
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
[],
fetch_list
=
[
global_step
,
decayed_lr
])
python_decayed_lr
=
python_decay_fn
(
global_step
=
step
,
**
kwargs
)
self
.
assertAlmostEqual
(
python_decayed_lr
,
lr_val
[
0
])
step_val
,
lr_val
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
[],
fetch_list
=
[
fluid
.
layers
.
global_step_counter
(),
decayed_lr
])
python_decayed_lr
=
python_decay_fn
(
global_step
=
float
(
step
),
**
kwargs
)
self
.
assertAlmostEqual
(
python_decayed_lr
,
lr_val
[
0
],
msg
=
'Failed fn is {0}, Python result is {1}, Fluid result is {2}'
.
format
(
python_decay_fn
.
__name__
,
str
(
python_decayed_lr
),
str
(
lr_val
[
0
])))
def
test_decay
(
self
):
common_kwargs_true
=
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录