未验证 提交 4a5ce4fe 编写于 作者: Z Zeng Jinle 提交者: GitHub

Add AdadeltaOptimizer doc (#19875)

* add AdadeltaOptimizer doc, test=develop

* refine doc,test=develop

* folllow lanxiang's comments, test=develop, test=document_fix
上级 7912e6ca
......@@ -968,7 +968,7 @@ paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss',
paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b15cffad0903fc81af77a0580ceb2a9b'))
paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', 'b5e33fa8aca6cfbcaebfc6cd7742908a'))
paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', '3f1c5385519a3674c18c3a1ab34ac04f'))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
......
......@@ -43,9 +43,10 @@ __all__ = [
'Ftrl', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer',
'AdamOptimizer', 'AdamaxOptimizer', 'DpsgdOptimizer',
'DecayedAdagradOptimizer', 'RMSPropOptimizer', 'FtrlOptimizer', 'Adadelta',
'ModelAverage', 'LarsMomentum', 'LarsMomentumOptimizer',
'DGCMomentumOptimizer', 'LambOptimizer', 'ExponentialMovingAverage',
'PipelineOptimizer', 'LookaheadOptimizer', 'RecomputeOptimizer'
'AdadeltaOptimizer', 'ModelAverage', 'LarsMomentum',
'LarsMomentumOptimizer', 'DGCMomentumOptimizer', 'LambOptimizer',
'ExponentialMovingAverage', 'PipelineOptimizer', 'LookaheadOptimizer',
'RecomputeOptimizer'
]
......@@ -1778,39 +1779,42 @@ class DecayedAdagradOptimizer(Optimizer):
class AdadeltaOptimizer(Optimizer):
"""
**Adadelta Optimizer**
**NOTES: This API does not support sparse parameter optimization.**
Simple Adadelta optimizer with average squared grad state and
average squared update state.
The details of adadelta please refer to this
Adadelta Optimizer. Please refer to this for details:
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
<http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
<https://arxiv.org/abs/1212.5701>`_.
.. math::
.. math::
E(g_t^2) &= \rho * E(g_{t-1}^2) + (1-\rho) * g^2\\
E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
E(g_t^2) + \\epsilon ) ) \\\\
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \epsilon ) / ( E(g_t^2) + \epsilon ) }\\
E(dx_t^2) &= \rho * E(dx_{t-1}^2) + (1-\rho) * (-g*learning\_rate)^2
Args:
learning_rate(float): global learning rate
rho(float): rho in equation
epsilon(float): epsilon in equation
regularization: A Regularizer, such as
fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix.
learning_rate(float|Variable): global learning rate.
epsilon(float): a small float number for numeric stability. Default 1.0e-6.
rho(float): a floating point value indicating the decay rate.
regularization(WeightDecayRegularizer, optional): A Regularizer, such as fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no regularization.
name(str, optional): A optional name prefix for debugging. Default None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
image = fluid.layers.data(name='image', shape=[28], dtype='float32')
fc = fluid.layers.fc(image, size=10)
cost = fluid.layers.reduce_mean(fc)
optimizer = fluid.optimizer.Adadelta(
learning_rate=0.0003, epsilon=1.0e-6, rho=0.95)
_, params_grads = optimizer.minimize(cost)
Notes:
Currently, AdadeltaOptimizer doesn't support sparse parameter optimization.
# optimizer_ops is a list of optimizer operators to update parameters
# params_grads is a list of (param, param_grad), where param is each
# parameter and param_grad is the gradient variable of param.
optimizer_ops, params_grads = optimizer.minimize(cost)
"""
_avg_squared_grad_acc_str = "_avg_squared_grad"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册