diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index e9d8bbab8662ed9e9db1320c89d6db03360d3983..4c608f96bdf0ca715fc89c0752e891f8c2b80d87 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -18,7 +18,8 @@ class Optimizer(object): but need to use one of it's implementation. """ - def __init__(self): + def __init__(self, global_step=None): + self._global_step = global_step # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters # to train. These variables are called accumulators. @@ -109,6 +110,26 @@ class Optimizer(object): format(name, param.name)) return self._accumulators[name][param.name] + def _increment_global_step(self, block): + """Increment the global step by 1 after every iteration + + Args: + block: the block in which the loss variable is present + + Returns: + list with global_step increment op as its only element + """ + assert isinstance(block, framework.Block) + assert self._global_step is not None + # create the increment op + increment_op = block.append_op( + type="increment", + inputs={"X": self._global_step}, + outputs={"Out": self._global_step}, + attrs={"step": 1.0}) + + return increment_op + def create_optimization_pass(self, parameters_and_grads, loss): """Add optimization operators to update gradients to variables. @@ -152,6 +173,8 @@ class Optimizer(object): if finish_ops is not None: return_ops += finish_ops + if self._global_step is not None: + return_ops.append(self._increment_global_step(loss.block)) return return_ops def minimize(self, loss, parameter_list=None, no_grad_set=None): @@ -172,9 +195,9 @@ class SGDOptimizer(Optimizer): """ Simple SGD optimizer without any state. """ - def __init__(self, learning_rate): + def __init__(self, learning_rate, global_step=None): assert learning_rate is not None - super(SGDOptimizer, self).__init__() + super(SGDOptimizer, self).__init__(global_step) self.type = "sgd" self._learning_rate = learning_rate @@ -215,10 +238,14 @@ class MomentumOptimizer(Optimizer): """ _velocity_acc_str = "velocity" - def __init__(self, learning_rate, momentum, use_nesterov=False): + def __init__(self, + learning_rate, + momentum, + use_nesterov=False, + global_step=None): assert learning_rate is not None assert momentum is not None - super(MomentumOptimizer, self).__init__() + super(MomentumOptimizer, self).__init__(global_step) self.type = "momentum" self._learning_rate = learning_rate self._momentum = momentum @@ -275,10 +302,10 @@ class AdagradOptimizer(Optimizer): """ _moment_acc_str = "moment" - def __init__(self, learning_rate, epsilon=1.0e-6): + def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None): assert learning_rate is not None assert epsilon is not None - super(AdagradOptimizer, self).__init__() + super(AdagradOptimizer, self).__init__(global_step) self.type = "adagrad" self._learning_rate = learning_rate self._epsilon = epsilon @@ -337,12 +364,13 @@ class AdamOptimizer(Optimizer): learning_rate=0.001, beta1=0.9, beta2=0.999, - epsilon=1e-8): + epsilon=1e-8, + global_step=None): assert learning_rate is not None assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(AdamOptimizer, self).__init__() + super(AdamOptimizer, self).__init__(global_step) self.type = "adam" self._learning_rate = learning_rate self._beta1 = beta1 @@ -458,7 +486,8 @@ class AdamaxOptimizer(Optimizer): learning_rate=0.001, beta1=0.9, beta2=0.999, - epsilon=1e-8): + epsilon=1e-8, + global_step=None): assert learning_rate is not None assert beta1 is not None assert beta2 is not None diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index 6dfd94e8c8c96d87037faa028a3d2a537a90c9c7..45396c9bec9ccf0668b048b2b4855d7a665ebea5 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -27,6 +27,32 @@ class TestOptimizer(unittest.TestCase): sgd_op = opts[0] self.assertEqual(sgd_op.type, "sgd") + def test_sgd_optimizer_with_global_step(self): + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + global_step = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="step") + sgd_optimizer = optimizer.SGDOptimizer( + learning_rate=0.01, global_step=global_step) + opts = sgd_optimizer.minimize(mul_out) + self.assertEqual(len(opts), 2) + sgd_op = opts[0] + self.assertEqual(sgd_op.type, "sgd") + increment_op = opts[1] + self.assertEqual(increment_op.type, "increment") + class TestMomentumOptimizer(unittest.TestCase): class MockMomentum(optimizer.MomentumOptimizer):