diff --git a/doc/v2/api/fluid/optimizer.rst b/doc/v2/api/fluid/optimizer.rst index 9b165f870459b4f9ef2efe24f5604a3fcb96f7f3..2f820595c35c2bccd6a5c8a20c60d796c04c8e97 100644 --- a/doc/v2/api/fluid/optimizer.rst +++ b/doc/v2/api/fluid/optimizer.rst @@ -47,3 +47,10 @@ DecayedAdagrad :members: :noindex: +Adadelta +-------------- + +.. autoclass:: paddle.fluid.optimizer.AdadeltaOptimizer + :members: + :noindex: + diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 75e1de00cf38a8e35fc0cb1f5a36459b7983bd63..e8623ee0dae820ece1874ed58ecf962b5f296d94 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -583,16 +583,44 @@ class DecayedAdagradOptimizer(Optimizer): class AdadeltaOptimizer(Optimizer): - """Simple Adadelta optimizer with average squared grad state and + """ + **Adadelta Optimizer** + Simple Adadelta optimizer with average squared grad state and average squared update state. + The details of adadelta please refer to this + `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD + `_. + + .. math:: + + E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\ + learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\ + E(g_t^2) + \\epsilon ) ) \\\\ + E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2 + + Args: + learning_rate(float): global leraning rate + rho(float): rho in equation + epsilon(float): epsilon in equation + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adadelta( + learning_rate=0.0003, epsilon=1.0e-6, rho=0.95) + _, params_grads = optimizer.minimize(cost) """ + _avg_squared_grad_acc_str = "_avg_squared_grad" _avg_squared_update_acc_str = "_avg_squared_update" def __init__(self, learning_rate, epsilon=1.0e-6, rho=0.95, **kwargs): - assert learning_rate is not None - assert epsilon is not None - assert rho is not None + if learning_rate is None: + raise ValueError("learning_rate is not set.") + if epsilon is None: + raise ValueError("epsilon is not set.") + if rho is None: + raise ValueError("rho is not set.") super(AdadeltaOptimizer, self).__init__( learning_rate=learning_rate, **kwargs) self.type = "adadelta" @@ -600,14 +628,16 @@ class AdadeltaOptimizer(Optimizer): self._rho = rho def _create_accumulators(self, block, parameters): - assert isinstance(block, framework.Block) + if not isinstance(block, framework.Block): + raise TypeError("block is not instance of framework.Block.") for p in parameters: self._add_accumulator(self._avg_squared_grad_acc_str, p) self._add_accumulator(self._avg_squared_update_acc_str, p) def _append_optimize_op(self, block, param_and_grad): - assert isinstance(block, framework.Block) + if not isinstance(block, framework.Block): + raise TypeError("block is not instance of framework.Block.") avg_squared_grad_acc = self._get_accumulator( self._avg_squared_grad_acc_str, param_and_grad[0])