From f45a82be4ea792099bb543b31cb3d77fc805bcea Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Tue, 6 Mar 2018 13:03:53 +0800 Subject: [PATCH] change learning_rate_decay to learning_rate_scheduler (#8583) * change learning_rate_decay to learning_rate_scheduler * optimize code * change nn.cast to tensor.cast --- python/paddle/fluid/__init__.py | 1 - python/paddle/fluid/layers/__init__.py | 2 + .../learning_rate_scheduler.py} | 48 ++++++++++--------- .../tests/book/test_label_semantic_roles.py | 2 +- ...cay.py => test_learning_rate_scheduler.py} | 23 ++++----- 5 files changed, 38 insertions(+), 38 deletions(-) rename python/paddle/fluid/{learning_rate_decay.py => layers/learning_rate_scheduler.py} (85%) rename python/paddle/fluid/tests/unittests/{test_learning_rate_decay.py => test_learning_rate_scheduler.py} (86%) diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 39d13d3ab5f..3f407d05768 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -26,7 +26,6 @@ import initializer import layers import nets import optimizer -import learning_rate_decay import backward import regularizer from param_attr import ParamAttr, WeightNormParamAttr diff --git a/python/paddle/fluid/layers/__init__.py b/python/paddle/fluid/layers/__init__.py index 906a16a49f7..14d33582f41 100644 --- a/python/paddle/fluid/layers/__init__.py +++ b/python/paddle/fluid/layers/__init__.py @@ -28,6 +28,7 @@ import math_op_patch from math_op_patch import * import detection from detection import * +from learning_rate_scheduler import * __all__ = [] __all__ += math_op_patch.__all__ @@ -38,3 +39,4 @@ __all__ += control_flow.__all__ __all__ += ops.__all__ __all__ += device.__all__ __all__ += detection.__all__ +__all__ += learning_rate_scheduler.__all__ diff --git a/python/paddle/fluid/learning_rate_decay.py b/python/paddle/fluid/layers/learning_rate_scheduler.py similarity index 85% rename from python/paddle/fluid/learning_rate_decay.py rename to python/paddle/fluid/layers/learning_rate_scheduler.py index 631efa04874..65b95a58d65 100644 --- a/python/paddle/fluid/learning_rate_decay.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import layers -from initializer import init_on_cpu +import control_flow +import nn +import ops +import tensor +from ..initializer import init_on_cpu __all__ = [ 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', @@ -31,9 +34,9 @@ strategy according to this module. def _decay_step_counter(): # the first global step is zero in learning rate decay - global_step = layers.autoincreased_step_counter( + global_step = nn.autoincreased_step_counter( counter_name='@LR_DECAY_COUNTER@', begin=0, step=1) - global_step = layers.cast(global_step, 'float32') + global_step = tensor.cast(global_step, 'float32') return global_step @@ -60,7 +63,7 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): # update learning_rate div_res = global_step / decay_steps if staircase: - div_res = layers.floor(x=div_res) + div_res = ops.floor(div_res) decayed_lr = learning_rate * (decay_rate**div_res) return decayed_lr @@ -89,8 +92,8 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): with init_on_cpu(): div_res = global_step / decay_steps if staircase: - div_res = layers.floor(x=div_res) - decayed_lr = learning_rate * layers.exp(x=(-1 * decay_rate * div_res)) + div_res = ops.floor(div_res) + decayed_lr = learning_rate * ops.exp(-1 * decay_rate * div_res) return decayed_lr @@ -118,7 +121,7 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): with init_on_cpu(): div_res = global_step / decay_steps if staircase: - div_res = layers.floor(x=div_res) + div_res = ops.floor(div_res) decayed_lr = learning_rate / (1 + decay_rate * div_res) @@ -154,21 +157,20 @@ def polynomial_decay(learning_rate, with init_on_cpu(): if cycle: - div_res = layers.ceil(x=(global_step / decay_steps)) - zero_var = layers.fill_constant( + div_res = ops.ceil(global_step / decay_steps) + zero_var = tensor.fill_constant( shape=[1], dtype='float32', value=0.0) - one_var = layers.fill_constant( + one_var = tensor.fill_constant( shape=[1], dtype='float32', value=1.0) - with layers.Switch() as switch: + with control_flow.Switch() as switch: with switch.case(global_step == zero_var): - layers.assign(input=one_var, output=div_res) + tensor.assign(input=one_var, output=div_res) decay_steps = decay_steps * div_res else: - decay_steps_var = layers.fill_constant( + decay_steps_var = tensor.fill_constant( shape=[1], dtype='float32', value=float(decay_steps)) - global_step = layers.elementwise_min( - x=global_step, y=decay_steps_var) + global_step = ops.elementwise_min(x=global_step, y=decay_steps_var) decayed_lr = (learning_rate - end_learning_rate) * \ ((1 - global_step / decay_steps) ** power) + end_learning_rate @@ -195,26 +197,26 @@ def piecewise_decay(boundaries, values): global_step = _decay_step_counter() with init_on_cpu(): - lr = layers.create_global_var( + lr = tensor.create_global_var( shape=[1], value=0.0, dtype='float32', persistable=True, name="learning_rate") - with layers.Switch() as switch: + with control_flow.Switch() as switch: for i in range(len(boundaries)): - boundary_val = layers.fill_constant( + boundary_val = tensor.fill_constant( shape=[1], dtype='float32', value=float(boundaries[i])) - value_var = layers.fill_constant( + value_var = tensor.fill_constant( shape=[1], dtype='float32', value=float(values[i])) with switch.case(global_step < boundary_val): - layers.assign(value_var, lr) - last_value_var = layers.fill_constant( + tensor.assign(value_var, lr) + last_value_var = tensor.fill_constant( shape=[1], dtype='float32', value=float(values[len(values) - 1])) with switch.default(): - layers.assign(last_value_var, lr) + tensor.assign(last_value_var, lr) return lr diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index 5c6374b9317..f488527e0bc 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -170,7 +170,7 @@ def train(use_cuda, save_dirname=None, is_local=True): # TODO(qiao) # check other optimizers and check why out will be NAN sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.learning_rate_decay.exponential_decay( + learning_rate=fluid.layers.exponential_decay( learning_rate=0.0001, decay_steps=100000, decay_rate=0.5, diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_decay.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py similarity index 86% rename from python/paddle/fluid/tests/unittests/test_learning_rate_decay.py rename to python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py index 5c221a0325b..ab25bfffaa4 100644 --- a/python/paddle/fluid/tests/unittests/test_learning_rate_decay.py +++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py @@ -17,8 +17,8 @@ import math import unittest import paddle.fluid as fluid +import paddle.fluid.layers as layers import paddle.fluid.framework as framework -import paddle.fluid.learning_rate_decay as lr_decay def exponential_decay(learning_rate, @@ -111,27 +111,24 @@ class TestLearningRateDecay(unittest.TestCase): common_kwargs_false["staircase"] = False decay_fns = [ - (exponential_decay, lr_decay.exponential_decay, common_kwargs_true), - (exponential_decay, lr_decay.exponential_decay, + (exponential_decay, layers.exponential_decay, common_kwargs_true), + (exponential_decay, layers.exponential_decay, common_kwargs_false), + (natural_exp_decay, layers.natural_exp_decay, common_kwargs_true), + (natural_exp_decay, layers.natural_exp_decay, common_kwargs_false), + (inverse_time_decay, layers.inverse_time_decay, common_kwargs_true), + (inverse_time_decay, layers.inverse_time_decay, common_kwargs_false), - (natural_exp_decay, lr_decay.natural_exp_decay, common_kwargs_true), - (natural_exp_decay, lr_decay.natural_exp_decay, - common_kwargs_false), - (inverse_time_decay, lr_decay.inverse_time_decay, - common_kwargs_true), - (inverse_time_decay, lr_decay.inverse_time_decay, - common_kwargs_false), - (polynomial_decay, lr_decay.polynomial_decay, { + (polynomial_decay, layers.polynomial_decay, { "learning_rate": 1.0, "decay_steps": 5, "cycle": True }), - (polynomial_decay, lr_decay.polynomial_decay, { + (polynomial_decay, layers.polynomial_decay, { "learning_rate": 1.0, "decay_steps": 5, "cycle": False }), - (piecewise_decay, lr_decay.piecewise_decay, { + (piecewise_decay, layers.piecewise_decay, { "boundaries": [3, 6, 9], "values": [0.1, 0.2, 0.3, 0.4] }), -- GitLab