From 5cc2f0bdda6038ed914892152c8ab0ab0404aa2d Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Thu, 8 Feb 2018 11:21:59 +0800 Subject: [PATCH] Add polynomial_decay and piecewise_decay (#8013) * init polynomial_decay * test polynomial_decay * complete polynomial_decay * fix conditional block op * init scalar-switch-case-op * switch op can compile * complete forward switch_op * add GetMatchCaseIndex * add switch_grad_op * init switch Python API * add test_switch * support set block list in python * fix scope problem * complete test * optimize test * optimize test * rm backward part * clear grad op * polynomial_decay use switch op * revert conditional_block_op and reshape_op * add piecewise_decay and test * fix piecewise_decay * try to use condition op for switch * can work * clean old code * revert * rm switch_op.cc * optimize code * add attr is_scalar_condition for condition_block_op * fix comment * fix comment * add export --- python/paddle/v2/fluid/layers/control_flow.py | 31 ++++++ python/paddle/v2/fluid/learning_rate_decay.py | 102 +++++++++++++++++- .../fluid/tests/test_learning_rate_decay.py | 93 +++++++++++----- 3 files changed, 197 insertions(+), 29 deletions(-) diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index e71f3858b0a..f29d7712334 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -38,6 +38,7 @@ __all__ = [ 'array_write', 'create_array', 'less_than', + 'equal', 'array_read', 'shrink_memory', 'array_length', @@ -975,6 +976,36 @@ def less_than(x, y, cond=None, **ignored): return cond +def equal(x, y, cond=None, **ignored): + """ + **equal** + + This layer returns the truth value of :math:`x == y` elementwise. + + Args: + x(Variable): First operand of *equal* + y(Variable): Second operand of *equal* + cond(Variable|None): Optional output variable to store the result of *equal* + + Returns: + Variable: The tensor variable storing the output of *equal*. + + Examples: + .. code-block:: python + + less = fluid.layers.equal(x=label, y=limit) + """ + helper = LayerHelper("equal", **locals()) + if cond is None: + cond = helper.create_tmp_variable(dtype='bool') + cond.stop_gradient = True + + helper.append_op( + type='equal', inputs={'X': [x], + 'Y': [y]}, outputs={'Out': [cond]}) + return cond + + def array_read(array, i): """This function performs the operation to read the data in as an LOD_TENSOR_ARRAY. diff --git a/python/paddle/v2/fluid/learning_rate_decay.py b/python/paddle/v2/fluid/learning_rate_decay.py index 96b3e9a0d73..13dc98075f7 100644 --- a/python/paddle/v2/fluid/learning_rate_decay.py +++ b/python/paddle/v2/fluid/learning_rate_decay.py @@ -15,7 +15,10 @@ import layers from framework import Variable -__all__ = ['exponential_decay', 'natural_exp_decay', 'inverse_time_decay'] +__all__ = [ + 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', + 'polynomial_decay', 'piecewise_decay' +] """ When training a model, it's often useful to decay the learning rate during training process, this is called @@ -101,7 +104,7 @@ def inverse_time_decay(learning_rate, ```python if staircase: decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) - else + else: decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) ``` Args: @@ -123,3 +126,98 @@ def inverse_time_decay(learning_rate, div_res = layers.floor(x=div_res) return learning_rate / (1 + decay_rate * div_res) + + +def polynomial_decay(learning_rate, + global_step, + decay_steps, + end_learning_rate=0.0001, + power=1.0, + cycle=False): + """Applies polynomial decay to the initial learning rate. + + ```python + if cycle: + decay_steps = decay_steps * ceil(global_step / decay_steps) + else: + global_step = min(global_step, decay_steps) + decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ power + + end_learning_rate + ``` + Args: + learning_rate: A scalar float32 value or a Variable. This + will be the initial learning rate during training + global_step: A Variable that record the training step. + decay_steps: A Python `int32` number. + end_learning_rate: A Python `float` number. + power: A Python `float` number + cycle: Boolean. If set true, decay the learning rate every decay_steps. + + Returns: + The decayed learning rate + """ + if not isinstance(global_step, Variable): + raise ValueError("global_step is required for inverse_time_decay.") + + if cycle: + div_res = layers.ceil(x=(global_step / decay_steps)) + zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0) + one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0) + + with layers.Switch() as switch: + with switch.case(layers.equal(x=global_step, y=zero_var)): + layers.assign(input=one_var, output=div_res) + decay_steps = decay_steps * div_res + else: + decay_steps_var = layers.fill_constant( + shape=[1], dtype='float32', value=float(decay_steps)) + global_step = layers.elementwise_min(x=global_step, y=decay_steps_var) + + return (learning_rate - end_learning_rate) * \ + ((1 - global_step / decay_steps) ** power) + end_learning_rate + + +def piecewise_decay(global_step, boundaries, values): + """Applies piecewise decay to the initial learning rate. + + ```python + boundaries = [10000, 20000] + values = [1.0, 0.5, 0.1] + + if step < 10000: + learning_rate = 1.0 + elif step >= 10000 and step < 20000: + learning_rate = 0.5 + else: + learning_rate = 0.1 + ``` + """ + + if len(values) - len(boundaries) != 1: + raise ValueError("len(values) - len(boundaries) should be 1") + + if not isinstance(global_step, Variable): + raise ValueError("global_step is required for piecewise_decay.") + + lr = layers.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") + + with layers.Switch() as switch: + for i in range(len(boundaries)): + boundary_val = layers.fill_constant( + shape=[1], dtype='float32', value=float(boundaries[i])) + value_var = layers.fill_constant( + shape=[1], dtype='float32', value=float(values[i])) + with switch.case(layers.less_than(global_step, boundary_val)): + layers.assign(value_var, lr) + last_value_var = layers.fill_constant( + shape=[1], dtype='float32', value=float(values[len(values) - 1])) + with switch.default(): + layers.assign(last_value_var, lr) + + return lr diff --git a/python/paddle/v2/fluid/tests/test_learning_rate_decay.py b/python/paddle/v2/fluid/tests/test_learning_rate_decay.py index dc348cf2d21..1d6bab3d6c4 100644 --- a/python/paddle/v2/fluid/tests/test_learning_rate_decay.py +++ b/python/paddle/v2/fluid/tests/test_learning_rate_decay.py @@ -15,6 +15,8 @@ import unittest import math +import copy + import paddle.v2.fluid.framework as framework import paddle.v2.fluid as fluid import paddle.v2.fluid.layers as layers @@ -54,21 +56,37 @@ def inverse_time_decay(learning_rate, return learning_rate / (1 + decay_rate * temp) -class TestLearningRateDecay(unittest.TestCase): - def check_decay(self, python_decay_fn, fluid_decay_fn, staircase): - init_lr = 1.0 - decay_steps = 5 - decay_rate = 0.5 +def polynomial_decay(learning_rate, + global_step, + decay_steps, + end_learning_rate=0.0001, + power=1.0, + cycle=False): + if cycle: + div = math.ceil(global_step / float(decay_steps)) + if div == 0: + div = 1 + decay_steps = decay_steps * div + else: + global_step = min(global_step, decay_steps) + return (learning_rate - end_learning_rate) * \ + ((1 - float(global_step) / float(decay_steps)) ** power) + end_learning_rate + + +def piecewise_decay(global_step, boundaries, values): + assert len(boundaries) + 1 == len(values) + for i in range(len(boundaries)): + if global_step < boundaries[i]: + return values[i] + return values[len(values) - 1] + +class TestLearningRateDecay(unittest.TestCase): + def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs): global_step = layers.create_global_var( shape=[1], value=0.0, dtype='float32', persistable=True) - decayed_lr = fluid_decay_fn( - learning_rate=init_lr, - global_step=global_step, - decay_steps=decay_steps, - decay_rate=decay_rate, - staircase=staircase) + decayed_lr = fluid_decay_fn(global_step=global_step, **kwargs) layers.increment(global_step, 1.0) place = fluid.CPUPlace() @@ -79,31 +97,52 @@ class TestLearningRateDecay(unittest.TestCase): step_val, lr_val = exe.run(fluid.default_main_program(), feed=[], fetch_list=[global_step, decayed_lr]) - python_decayed_lr = python_decay_fn( - learning_rate=init_lr, - global_step=step, - decay_steps=decay_steps, - decay_rate=decay_rate, - staircase=staircase) + python_decayed_lr = python_decay_fn(global_step=step, **kwargs) self.assertAlmostEqual(python_decayed_lr, lr_val[0]) def test_decay(self): + common_kwargs_true = { + "learning_rate": 1.0, + "decay_steps": 5, + "decay_rate": 0.5, + "staircase": True + } + common_kwargs_false = copy.deepcopy(common_kwargs_true) + common_kwargs_false["staircase"] = False + decay_fns = [ - (exponential_decay, lr_decay.exponential_decay, True), - (exponential_decay, lr_decay.exponential_decay, False), - (natural_exp_decay, lr_decay.natural_exp_decay, True), - (natural_exp_decay, lr_decay.natural_exp_decay, False), - (inverse_time_decay, lr_decay.inverse_time_decay, True), - (inverse_time_decay, lr_decay.inverse_time_decay, False), + (exponential_decay, lr_decay.exponential_decay, common_kwargs_true), + (exponential_decay, lr_decay.exponential_decay, + common_kwargs_false), + (natural_exp_decay, lr_decay.natural_exp_decay, common_kwargs_true), + (natural_exp_decay, lr_decay.natural_exp_decay, + common_kwargs_false), + (inverse_time_decay, lr_decay.inverse_time_decay, + common_kwargs_true), + (inverse_time_decay, lr_decay.inverse_time_decay, + common_kwargs_false), + (polynomial_decay, lr_decay.polynomial_decay, { + "learning_rate": 1.0, + "decay_steps": 5, + "cycle": True + }), + (polynomial_decay, lr_decay.polynomial_decay, { + "learning_rate": 1.0, + "decay_steps": 5, + "cycle": False + }), + (piecewise_decay, lr_decay.piecewise_decay, { + "boundaries": [3, 6, 9], + "values": [0.1, 0.2, 0.3, 0.4] + }), ] - for py_decay_fn, fluid_decay_fn, staircase in decay_fns: - print("decay_fn=" + str(py_decay_fn) + " staircase=" + str( - staircase)) + for py_decay_fn, fluid_decay_fn, kwargs in decay_fns: + print("decay_fn=" + py_decay_fn.__name__ + " kwargs=" + str(kwargs)) main_program = framework.Program() startup_program = framework.Program() with framework.program_guard(main_program, startup_program): - self.check_decay(py_decay_fn, fluid_decay_fn, staircase) + self.check_decay(py_decay_fn, fluid_decay_fn, kwargs) if __name__ == '__main__': -- GitLab