Add polynomial_decay and piecewise_decay (#8013)

* init polynomial_decay * test polynomial_decay * complete polynomial_decay * fix conditional block op * init scalar-switch-case-op * switch op can compile * complete forward switch_op * add GetMatchCaseIndex * add switch_grad_op * init switch Python API * add test_switch * support set block list in python * fix scope problem * complete test * optimize test * optimize test * rm backward part * clear grad op * polynomial_decay use switch op * revert conditional_block_op and reshape_op * add piecewise_decay and test * fix piecewise_decay * try to use condition op for switch * can work * clean old code * revert * rm switch_op.cc * optimize code * add attr is_scalar_condition for condition_block_op * fix comment * fix comment * add export

Add polynomial_decay and piecewise_decay (#8013)
* init polynomial_decay * test polynomial_decay * complete polynomial_decay * fix conditional block op * init scalar-switch-case-op * switch op can compile * complete forward switch_op * add GetMatchCaseIndex * add switch_grad_op * init switch Python API * add test_switch * support set block list in python * fix scope problem * complete test * optimize test * optimize test * rm backward part * clear grad op * polynomial_decay use switch op * revert conditional_block_op and reshape_op * add piecewise_decay and test * fix piecewise_decay * try to use condition op for switch * can work * clean old code * revert * rm switch_op.cc * optimize code * add attr is_scalar_condition for condition_block_op * fix comment * fix comment * add export
5cc2f0bd · Qiao Longfei · GitHub · 6612068e · 5cc2f0bd · 5cc2f0bd
3 changed file
--- a/python/paddle/v2/fluid/layers/control_flow.py
+++ b/python/paddle/v2/fluid/layers/control_flow.py
@@ -38,6 +38,7 @@ __all__ = [
    'array_write',
    'create_array',
    'less_than',
+    'equal',
    'array_read',
    'shrink_memory',
    'array_length',
@@ -975,6 +976,36 @@ def less_than(x, y, cond=None, **ignored):
    return cond
+def equal(x, y, cond=None, **ignored):
+    """
+    **equal**
+    This layer returns the truth value of :math:`x == y` elementwise.
+    Args:
+        x(Variable): First operand of *equal*
+        y(Variable): Second operand of *equal*
+        cond(Variable|None): Optional output variable to store the result of *equal*
+    Returns:
+        Variable: The tensor variable storing the output of *equal*.
+    Examples:
+        .. code-block:: python
+          less = fluid.layers.equal(x=label, y=limit)
+    """
+    helper = LayerHelper("equal", **locals())
+    if cond is None:
+        cond = helper.create_tmp_variable(dtype='bool')
+        cond.stop_gradient = True
+    helper.append_op(
+        type='equal', inputs={'X': [x],
+                              'Y': [y]}, outputs={'Out': [cond]})
+    return cond
 def array_read(array, i):
    """This function performs the operation to read the data in as an
    LOD_TENSOR_ARRAY.

--- a/python/paddle/v2/fluid/learning_rate_decay.py
+++ b/python/paddle/v2/fluid/learning_rate_decay.py
@@ -15,7 +15,10 @@
 import layers
 from framework import Variable
-__all__ = ['exponential_decay', 'natural_exp_decay', 'inverse_time_decay']
+__all__ = [
+    'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
+    'polynomial_decay', 'piecewise_decay'
+]
 """
 When training a model, it's often useful to decay the
 learning rate during training process, this is called
@@ -101,7 +104,7 @@ def inverse_time_decay(learning_rate,
    ```python
    if staircase:
      decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
-    else
+    else:
      decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
    ```
    Args:
@@ -123,3 +126,98 @@ def inverse_time_decay(learning_rate,
        div_res = layers.floor(x=div_res)
    return learning_rate / (1 + decay_rate * div_res)
+def polynomial_decay(learning_rate,
+                     global_step,
+                     decay_steps,
+                     end_learning_rate=0.0001,
+                     power=1.0,
+                     cycle=False):
+    """Applies polynomial decay to the initial learning rate.
+    ```python
+    if cycle:
+        decay_steps = decay_steps * ceil(global_step / decay_steps)
+    else:
+        global_step = min(global_step, decay_steps)
+    decayed_learning_rate = (learning_rate - end_learning_rate) *
+                      (1 - global_step / decay_steps) ^ power +
+                      end_learning_rate
+    ```
+    Args:
+        learning_rate: A scalar float32 value or a Variable. This
+          will be the initial learning rate during training
+        global_step: A Variable that record the training step.
+        decay_steps: A Python `int32` number.
+        end_learning_rate: A Python `float` number.
+        power: A Python `float` number
+        cycle: Boolean. If set true, decay the learning rate every decay_steps.
+    Returns:
+        The decayed learning rate
+    """
+    if not isinstance(global_step, Variable):
+        raise ValueError("global_step is required for inverse_time_decay.")
+    if cycle:
+        div_res = layers.ceil(x=(global_step / decay_steps))
+        zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0)
+        one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0)
+        with layers.Switch() as switch:
+            with switch.case(layers.equal(x=global_step, y=zero_var)):
+                layers.assign(input=one_var, output=div_res)
+        decay_steps = decay_steps * div_res
+    else:
+        decay_steps_var = layers.fill_constant(
+            shape=[1], dtype='float32', value=float(decay_steps))
+        global_step = layers.elementwise_min(x=global_step, y=decay_steps_var)
+    return (learning_rate - end_learning_rate) * \
+           ((1 - global_step / decay_steps) ** power) + end_learning_rate
+def piecewise_decay(global_step, boundaries, values):
+    """Applies piecewise decay to the initial learning rate.
+    ```python
+    boundaries = [10000, 20000]
+    values = [1.0, 0.5, 0.1]
+    if step < 10000:
+        learning_rate = 1.0
+    elif step >= 10000 and step < 20000:
+        learning_rate = 0.5
+    else:
+        learning_rate = 0.1
+    ```
+    """
+    if len(values) - len(boundaries) != 1:
+        raise ValueError("len(values) - len(boundaries) should be 1")
+    if not isinstance(global_step, Variable):
+        raise ValueError("global_step is required for piecewise_decay.")
+    lr = layers.create_global_var(
+        shape=[1],
+        value=0.0,
+        dtype='float32',
+        persistable=True,
+        name="learning_rate")
+    with layers.Switch() as switch:
+        for i in range(len(boundaries)):
+            boundary_val = layers.fill_constant(
+                shape=[1], dtype='float32', value=float(boundaries[i]))
+            value_var = layers.fill_constant(
+                shape=[1], dtype='float32', value=float(values[i]))
+            with switch.case(layers.less_than(global_step, boundary_val)):
+                layers.assign(value_var, lr)
+        last_value_var = layers.fill_constant(
+            shape=[1], dtype='float32', value=float(values[len(values) - 1]))
+        with switch.default():
+            layers.assign(last_value_var, lr)
+    return lr
--- a/python/paddle/v2/fluid/tests/test_learning_rate_decay.py
+++ b/python/paddle/v2/fluid/tests/test_learning_rate_decay.py
@@ -15,6 +15,8 @@
 import unittest
 import math
+import copy
 import paddle.v2.fluid.framework as framework
 import paddle.v2.fluid as fluid
 import paddle.v2.fluid.layers as layers
@@ -54,21 +56,37 @@ def inverse_time_decay(learning_rate,
    return learning_rate / (1 + decay_rate * temp)
-class TestLearningRateDecay(unittest.TestCase):
+def polynomial_decay(learning_rate,
-    def check_decay(self, python_decay_fn, fluid_decay_fn, staircase):
+                     global_step,
-        init_lr = 1.0
+                     decay_steps,
-        decay_steps = 5
+                     end_learning_rate=0.0001,
-        decay_rate = 0.5
+                     power=1.0,
+                     cycle=False):
+    if cycle:
+        div = math.ceil(global_step / float(decay_steps))
+        if div == 0:
+            div = 1
+        decay_steps = decay_steps * div
+    else:
+        global_step = min(global_step, decay_steps)
+    return (learning_rate - end_learning_rate) * \
+           ((1 - float(global_step) / float(decay_steps)) ** power) + end_learning_rate
+def piecewise_decay(global_step, boundaries, values):
+    assert len(boundaries) + 1 == len(values)
+    for i in range(len(boundaries)):
+        if global_step < boundaries[i]:
+            return values[i]
+    return values[len(values) - 1]
+class TestLearningRateDecay(unittest.TestCase):
+    def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs):
        global_step = layers.create_global_var(
            shape=[1], value=0.0, dtype='float32', persistable=True)
-        decayed_lr = fluid_decay_fn(
+        decayed_lr = fluid_decay_fn(global_step=global_step, **kwargs)
-            learning_rate=init_lr,
-            global_step=global_step,
-            decay_steps=decay_steps,
-            decay_rate=decay_rate,
-            staircase=staircase)
        layers.increment(global_step, 1.0)
        place = fluid.CPUPlace()
@@ -79,31 +97,52 @@ class TestLearningRateDecay(unittest.TestCase):
            step_val, lr_val = exe.run(fluid.default_main_program(),
                                       feed=[],
                                       fetch_list=[global_step, decayed_lr])
-            python_decayed_lr = python_decay_fn(
+            python_decayed_lr = python_decay_fn(global_step=step, **kwargs)
-                learning_rate=init_lr,
-                global_step=step,
-                decay_steps=decay_steps,
-                decay_rate=decay_rate,
-                staircase=staircase)
            self.assertAlmostEqual(python_decayed_lr, lr_val[0])
    def test_decay(self):
+        common_kwargs_true = {
+            "learning_rate": 1.0,
+            "decay_steps": 5,
+            "decay_rate": 0.5,
+            "staircase": True
+        }
+        common_kwargs_false = copy.deepcopy(common_kwargs_true)
+        common_kwargs_false["staircase"] = False
        decay_fns = [
-            (exponential_decay, lr_decay.exponential_decay, True),
+            (exponential_decay, lr_decay.exponential_decay, common_kwargs_true),
-            (exponential_decay, lr_decay.exponential_decay, False),
+            (exponential_decay, lr_decay.exponential_decay,
-            (natural_exp_decay, lr_decay.natural_exp_decay, True),
+             common_kwargs_false),
-            (natural_exp_decay, lr_decay.natural_exp_decay, False),
+            (natural_exp_decay, lr_decay.natural_exp_decay, common_kwargs_true),
-            (inverse_time_decay, lr_decay.inverse_time_decay, True),
+            (natural_exp_decay, lr_decay.natural_exp_decay,
-            (inverse_time_decay, lr_decay.inverse_time_decay, False),
+             common_kwargs_false),
+            (inverse_time_decay, lr_decay.inverse_time_decay,
+             common_kwargs_true),
+            (inverse_time_decay, lr_decay.inverse_time_decay,
+             common_kwargs_false),
+            (polynomial_decay, lr_decay.polynomial_decay, {
+                "learning_rate": 1.0,
+                "decay_steps": 5,
+                "cycle": True
+            }),
+            (polynomial_decay, lr_decay.polynomial_decay, {
+                "learning_rate": 1.0,
+                "decay_steps": 5,
+                "cycle": False
+            }),
+            (piecewise_decay, lr_decay.piecewise_decay, {
+                "boundaries": [3, 6, 9],
+                "values": [0.1, 0.2, 0.3, 0.4]
+            }),
        ]
-        for py_decay_fn, fluid_decay_fn, staircase in decay_fns:
+        for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
-            print("decay_fn=" + str(py_decay_fn) + " staircase=" + str(
+            print("decay_fn=" + py_decay_fn.__name__ + " kwargs=" + str(kwargs))
-                staircase))
            main_program = framework.Program()
            startup_program = framework.Program()
            with framework.program_guard(main_program, startup_program):
-                self.check_decay(py_decay_fn, fluid_decay_fn, staircase)
+                self.check_decay(py_decay_fn, fluid_decay_fn, kwargs)
 if __name__ == '__main__':