From 72847ad031cda087d28f806a830f7d5f5a785b63 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Mon, 19 Mar 2018 22:45:55 +0800 Subject: [PATCH] Add python API for Adadelta optimizer. --- python/paddle/fluid/optimizer.py | 57 +++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 1c12d53e4f3..d104cc5cbdc 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -24,7 +24,9 @@ from layer_helper import LayerHelper from regularizer import append_regularization_ops from clip import append_gradient_clip_ops, error_clip_callback -__all__ = ['SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad'] +__all__ = [ + 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Adadelta' +] class Optimizer(object): @@ -575,6 +577,58 @@ class DecayedAdagradOptimizer(Optimizer): return decayed_adagrad_op +class AdadeltaOptimizer(Optimizer): + """Simple Adadelta optimizer with average squared grad state and + average squared update state. + """ + _avg_squared_grad_acc_str = "_avg_squared_grad" + _avg_squared_update_acc_str = "_avg_squared_update" + + def __init__(self, learning_rate, epsilon=1.0e-6, rho=0.95, **kwargs): + assert learning_rate is not None + assert epsilon is not None + assert rho is not None + super(AdadeltaOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) + self.type = "adadelta" + self._epsilon = epsilon + self._rho = rho + + def _create_accumulators(self, block, parameters): + assert isinstance(block, framework.Block) + + for p in parameters: + self._add_accumulator(self._avg_squared_grad_acc_str, p) + self._add_accumulator(self._avg_squared_update_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + avg_squared_grad_acc = self._get_accumulator( + self._avg_squared_grad_acc_str, param_and_grad[0]) + avg_squared_update_acc = self._get_accumulator( + self._avg_squared_update_acc_str, param_and_grad[0]) + + # Create the adadelta optimizer op + adadelta_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "AvgSquaredGrad": avg_squared_grad_acc, + "AvgSquaredUpdate": avg_squared_update_acc + }, + outputs={ + "ParamOut": param_and_grad[0], + "AvgSquaredGradOut": avg_squared_grad_acc, + "AvgSquaredUpdateOut": avg_squared_update_acc + }, + attrs={"epsilon": self._epsilon, + "rho": self._rho}) + + return adadelta_op + + # We short the class name, since users will use the optimizer with the package # name. The sample code: # @@ -589,3 +643,4 @@ Adagrad = AdagradOptimizer Adam = AdamOptimizer Adamax = AdamaxOptimizer DecayedAdagrad = DecayedAdagradOptimizer +Adadelta = AdadeltaOptimizer -- GitLab