未验证 提交 5cc2f0bd 编写于 作者: Q Qiao Longfei 提交者: GitHub

Add polynomial_decay and piecewise_decay (#8013)

* init polynomial_decay

* test polynomial_decay

* complete polynomial_decay

* fix conditional block op

* init scalar-switch-case-op

* switch op can compile

* complete forward switch_op

* add GetMatchCaseIndex

* add switch_grad_op

* init switch Python API

* add test_switch

* support set block list in python

* fix scope problem

* complete test

* optimize test

* optimize test

* rm backward part

* clear grad op

* polynomial_decay use switch op

* revert conditional_block_op and reshape_op

* add piecewise_decay and test

* fix piecewise_decay

* try to use condition op for switch

* can work

* clean old code

* revert

* rm switch_op.cc

* optimize code

* add attr is_scalar_condition for condition_block_op

* fix comment

* fix comment

* add export
上级 6612068e
...@@ -38,6 +38,7 @@ __all__ = [ ...@@ -38,6 +38,7 @@ __all__ = [
'array_write', 'array_write',
'create_array', 'create_array',
'less_than', 'less_than',
'equal',
'array_read', 'array_read',
'shrink_memory', 'shrink_memory',
'array_length', 'array_length',
...@@ -975,6 +976,36 @@ def less_than(x, y, cond=None, **ignored): ...@@ -975,6 +976,36 @@ def less_than(x, y, cond=None, **ignored):
return cond return cond
def equal(x, y, cond=None, **ignored):
"""
**equal**
This layer returns the truth value of :math:`x == y` elementwise.
Args:
x(Variable): First operand of *equal*
y(Variable): Second operand of *equal*
cond(Variable|None): Optional output variable to store the result of *equal*
Returns:
Variable: The tensor variable storing the output of *equal*.
Examples:
.. code-block:: python
less = fluid.layers.equal(x=label, y=limit)
"""
helper = LayerHelper("equal", **locals())
if cond is None:
cond = helper.create_tmp_variable(dtype='bool')
cond.stop_gradient = True
helper.append_op(
type='equal', inputs={'X': [x],
'Y': [y]}, outputs={'Out': [cond]})
return cond
def array_read(array, i): def array_read(array, i):
"""This function performs the operation to read the data in as an """This function performs the operation to read the data in as an
LOD_TENSOR_ARRAY. LOD_TENSOR_ARRAY.
......
...@@ -15,7 +15,10 @@ ...@@ -15,7 +15,10 @@
import layers import layers
from framework import Variable from framework import Variable
__all__ = ['exponential_decay', 'natural_exp_decay', 'inverse_time_decay'] __all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay'
]
""" """
When training a model, it's often useful to decay the When training a model, it's often useful to decay the
learning rate during training process, this is called learning rate during training process, this is called
...@@ -101,7 +104,7 @@ def inverse_time_decay(learning_rate, ...@@ -101,7 +104,7 @@ def inverse_time_decay(learning_rate,
```python ```python
if staircase: if staircase:
decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
else else:
decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
``` ```
Args: Args:
...@@ -123,3 +126,98 @@ def inverse_time_decay(learning_rate, ...@@ -123,3 +126,98 @@ def inverse_time_decay(learning_rate,
div_res = layers.floor(x=div_res) div_res = layers.floor(x=div_res)
return learning_rate / (1 + decay_rate * div_res) return learning_rate / (1 + decay_rate * div_res)
def polynomial_decay(learning_rate,
global_step,
decay_steps,
end_learning_rate=0.0001,
power=1.0,
cycle=False):
"""Applies polynomial decay to the initial learning rate.
```python
if cycle:
decay_steps = decay_steps * ceil(global_step / decay_steps)
else:
global_step = min(global_step, decay_steps)
decayed_learning_rate = (learning_rate - end_learning_rate) *
(1 - global_step / decay_steps) ^ power +
end_learning_rate
```
Args:
learning_rate: A scalar float32 value or a Variable. This
will be the initial learning rate during training
global_step: A Variable that record the training step.
decay_steps: A Python `int32` number.
end_learning_rate: A Python `float` number.
power: A Python `float` number
cycle: Boolean. If set true, decay the learning rate every decay_steps.
Returns:
The decayed learning rate
"""
if not isinstance(global_step, Variable):
raise ValueError("global_step is required for inverse_time_decay.")
if cycle:
div_res = layers.ceil(x=(global_step / decay_steps))
zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0)
one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0)
with layers.Switch() as switch:
with switch.case(layers.equal(x=global_step, y=zero_var)):
layers.assign(input=one_var, output=div_res)
decay_steps = decay_steps * div_res
else:
decay_steps_var = layers.fill_constant(
shape=[1], dtype='float32', value=float(decay_steps))
global_step = layers.elementwise_min(x=global_step, y=decay_steps_var)
return (learning_rate - end_learning_rate) * \
((1 - global_step / decay_steps) ** power) + end_learning_rate
def piecewise_decay(global_step, boundaries, values):
"""Applies piecewise decay to the initial learning rate.
```python
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
if step < 10000:
learning_rate = 1.0
elif step >= 10000 and step < 20000:
learning_rate = 0.5
else:
learning_rate = 0.1
```
"""
if len(values) - len(boundaries) != 1:
raise ValueError("len(values) - len(boundaries) should be 1")
if not isinstance(global_step, Variable):
raise ValueError("global_step is required for piecewise_decay.")
lr = layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
with layers.Switch() as switch:
for i in range(len(boundaries)):
boundary_val = layers.fill_constant(
shape=[1], dtype='float32', value=float(boundaries[i]))
value_var = layers.fill_constant(
shape=[1], dtype='float32', value=float(values[i]))
with switch.case(layers.less_than(global_step, boundary_val)):
layers.assign(value_var, lr)
last_value_var = layers.fill_constant(
shape=[1], dtype='float32', value=float(values[len(values) - 1]))
with switch.default():
layers.assign(last_value_var, lr)
return lr
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
import unittest import unittest
import math import math
import copy
import paddle.v2.fluid.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
...@@ -54,21 +56,37 @@ def inverse_time_decay(learning_rate, ...@@ -54,21 +56,37 @@ def inverse_time_decay(learning_rate,
return learning_rate / (1 + decay_rate * temp) return learning_rate / (1 + decay_rate * temp)
class TestLearningRateDecay(unittest.TestCase): def polynomial_decay(learning_rate,
def check_decay(self, python_decay_fn, fluid_decay_fn, staircase): global_step,
init_lr = 1.0 decay_steps,
decay_steps = 5 end_learning_rate=0.0001,
decay_rate = 0.5 power=1.0,
cycle=False):
if cycle:
div = math.ceil(global_step / float(decay_steps))
if div == 0:
div = 1
decay_steps = decay_steps * div
else:
global_step = min(global_step, decay_steps)
return (learning_rate - end_learning_rate) * \
((1 - float(global_step) / float(decay_steps)) ** power) + end_learning_rate
def piecewise_decay(global_step, boundaries, values):
assert len(boundaries) + 1 == len(values)
for i in range(len(boundaries)):
if global_step < boundaries[i]:
return values[i]
return values[len(values) - 1]
class TestLearningRateDecay(unittest.TestCase):
def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs):
global_step = layers.create_global_var( global_step = layers.create_global_var(
shape=[1], value=0.0, dtype='float32', persistable=True) shape=[1], value=0.0, dtype='float32', persistable=True)
decayed_lr = fluid_decay_fn( decayed_lr = fluid_decay_fn(global_step=global_step, **kwargs)
learning_rate=init_lr,
global_step=global_step,
decay_steps=decay_steps,
decay_rate=decay_rate,
staircase=staircase)
layers.increment(global_step, 1.0) layers.increment(global_step, 1.0)
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -79,31 +97,52 @@ class TestLearningRateDecay(unittest.TestCase): ...@@ -79,31 +97,52 @@ class TestLearningRateDecay(unittest.TestCase):
step_val, lr_val = exe.run(fluid.default_main_program(), step_val, lr_val = exe.run(fluid.default_main_program(),
feed=[], feed=[],
fetch_list=[global_step, decayed_lr]) fetch_list=[global_step, decayed_lr])
python_decayed_lr = python_decay_fn( python_decayed_lr = python_decay_fn(global_step=step, **kwargs)
learning_rate=init_lr,
global_step=step,
decay_steps=decay_steps,
decay_rate=decay_rate,
staircase=staircase)
self.assertAlmostEqual(python_decayed_lr, lr_val[0]) self.assertAlmostEqual(python_decayed_lr, lr_val[0])
def test_decay(self): def test_decay(self):
common_kwargs_true = {
"learning_rate": 1.0,
"decay_steps": 5,
"decay_rate": 0.5,
"staircase": True
}
common_kwargs_false = copy.deepcopy(common_kwargs_true)
common_kwargs_false["staircase"] = False
decay_fns = [ decay_fns = [
(exponential_decay, lr_decay.exponential_decay, True), (exponential_decay, lr_decay.exponential_decay, common_kwargs_true),
(exponential_decay, lr_decay.exponential_decay, False), (exponential_decay, lr_decay.exponential_decay,
(natural_exp_decay, lr_decay.natural_exp_decay, True), common_kwargs_false),
(natural_exp_decay, lr_decay.natural_exp_decay, False), (natural_exp_decay, lr_decay.natural_exp_decay, common_kwargs_true),
(inverse_time_decay, lr_decay.inverse_time_decay, True), (natural_exp_decay, lr_decay.natural_exp_decay,
(inverse_time_decay, lr_decay.inverse_time_decay, False), common_kwargs_false),
(inverse_time_decay, lr_decay.inverse_time_decay,
common_kwargs_true),
(inverse_time_decay, lr_decay.inverse_time_decay,
common_kwargs_false),
(polynomial_decay, lr_decay.polynomial_decay, {
"learning_rate": 1.0,
"decay_steps": 5,
"cycle": True
}),
(polynomial_decay, lr_decay.polynomial_decay, {
"learning_rate": 1.0,
"decay_steps": 5,
"cycle": False
}),
(piecewise_decay, lr_decay.piecewise_decay, {
"boundaries": [3, 6, 9],
"values": [0.1, 0.2, 0.3, 0.4]
}),
] ]
for py_decay_fn, fluid_decay_fn, staircase in decay_fns: for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
print("decay_fn=" + str(py_decay_fn) + " staircase=" + str( print("decay_fn=" + py_decay_fn.__name__ + " kwargs=" + str(kwargs))
staircase))
main_program = framework.Program() main_program = framework.Program()
startup_program = framework.Program() startup_program = framework.Program()
with framework.program_guard(main_program, startup_program): with framework.program_guard(main_program, startup_program):
self.check_decay(py_decay_fn, fluid_decay_fn, staircase) self.check_decay(py_decay_fn, fluid_decay_fn, kwargs)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册