diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 4252a6f08509fec92ac5c45d32169232e1dd190f..d2841df6af7a0d860c239db952c767c995d30ba4 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -9,7 +9,7 @@ from paddle.v2.fluid.layer_helper import LayerHelper __all__ = [ 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', - 'AdamaxOptimizer' + 'AdamaxOptimizer', 'DecayedAdagradOptimizer' ] @@ -85,7 +85,7 @@ class Optimizer(object): """ if (name in self._accumulators and param.name in self._accumulators[name]): - raise Exception("Accumulator {} already exists for parmeter {}". + raise Exception("Accumulator {} already exists for parameter {}". format(name, param.name)) assert isinstance(self.helper, LayerHelper) @@ -307,7 +307,7 @@ class AdagradOptimizer(Optimizer): moment_acc = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) - # create the adagrad optimizer op + # Create the adagrad optimizer op adagrad_op = block.append_op( type=self.type, inputs={ @@ -510,3 +510,51 @@ class AdamaxOptimizer(Optimizer): attrs={"scale": self._beta1}) return [scale_beta1] + + +class DecayedAdagradOptimizer(Optimizer): + """Simple Decayed Adagrad optimizer with moment state + """ + _moment_acc_str = "moment" + + def __init__(self, + learning_rate, + decay=0.95, + epsilon=1.0e-6, + global_step=None): + assert learning_rate is not None + assert decay is not None + assert epsilon is not None + + super(DecayedAdagradOptimizer, self).__init__(global_step) + self.type = "decayed_adagrad" + self._learning_rate = learning_rate + self._decay = decay + self._epsilon = epsilon + + def _create_accumulators(self, block, parameters): + assert isinstance(block, framework.Block) + + for p in parameters: + self._add_accumulator(self._moment_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + moment_acc = self._get_accumulator(self._moment_acc_str, + param_and_grad[0]) + + # Create the decayed adagrad optimizer op + decayed_adagrad_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "Moment": moment_acc, + "LearningRate": self._create_param_lr(param_and_grad) + }, + outputs={"ParamOut": param_and_grad[0], + "MomentOut": moment_acc}, + attrs={"epsilon": self._epsilon}) + + return decayed_adagrad_op diff --git a/python/paddle/v2/fluid/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py index 0ebf7cdf208c41eacfdff88f59455584eff4ff8f..7b4237e7fdf5990019ddd85967036ceb598c33df 100644 --- a/python/paddle/v2/fluid/tests/test_optimizer.py +++ b/python/paddle/v2/fluid/tests/test_optimizer.py @@ -198,7 +198,7 @@ class TestAdagradOptimizer(unittest.TestCase): adagrad_op = opts[0] self.assertEqual(adagrad_op.type, "adagrad") - # check accumulators + # Check accumulators accumulators = adagrad_optimizer.get_accumulators() self.assertEqual(len(accumulators), 1) self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators) @@ -331,5 +331,59 @@ class TestAdamaxOptimizer(unittest.TestCase): self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) +class TestDecayedAdagradOptimizer(unittest.TestCase): + class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer): + def get_accumulators(self): + return self._accumulators + + def get_moment_str(self): + return self._moment_acc_str + + def test_decayed_adagrad_optimizer(self): + init_program = framework.Program() + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + learning_rate = 0.01 + decayed_adagrad_optimizer = self.MockDecayedAdagrad( + learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) + params_grads = append_backward_ops(mul_out) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) + opts = decayed_adagrad_optimizer.create_optimization_pass( + params_grads, mul_out, init_program) + self.assertEqual(len(opts), 1) + decayed_adagrad_op = opts[0] + self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad") + + # Check accumulators + accumulators = decayed_adagrad_optimizer.get_accumulators() + self.assertEqual(len(accumulators), 1) + self.assertTrue( + decayed_adagrad_optimizer.get_moment_str() in accumulators) + moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()] + self.assertEqual(len(moment_acc), 1) + self.assertTrue(mul_x.name in moment_acc) + + # Check init_program + init_ops = init_program.global_block().ops + self.assertEqual(len(init_ops), 2) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + self.assertEqual(init_ops[1].type, "fill_constant") + self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) + + if __name__ == '__main__': unittest.main()