提交 d2b791a0 编写于 作者: Q qiaolongfei

add SGD and momentum optimizer doc

上级 16a0f746
......@@ -28,8 +28,8 @@ from contextlib import contextmanager
__all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad',
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer',
'Adadelta', 'ModelAverage', 'Optimizer'
'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'AdadeltaOptimizer',
'RMSPropOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer'
]
......@@ -192,15 +192,15 @@ class Optimizer(object):
"""Add optimization operators to update gradients to variables.
Args:
loss: the target that this optimization is for.
parameters_and_grads: a list of (variable, gradient) pair to update.
loss(Variable): the target that this optimization is for.
parameters_and_grads(list(tuple(Variable, Variable))):
a list of (variable, gradient) pair to update.
Returns:
return_op_list: a list of operators that will complete one step of
optimization. This will include parameter update ops, global step
update ops and any other custom ops required by subclasses to manage
their internal state.
:param startup_program:
"""
# This is a default implementation of create_optimization_pass that
# can be shared by most optimizers. This implementation assumes that
......@@ -268,7 +268,22 @@ class Optimizer(object):
class SGDOptimizer(Optimizer):
""" Simple SGD optimizer without any state.
"""
Optimizer of the stochastic gradient descent algorithm.
.. math::
param\_out = param - learning\_rate * grad
Args:
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
Examples:
.. code-block:: python
sgd_optimizer = SGDOptimizer(learning_rate=0.2)
sgd_optimizer.minimize(cost)
"""
def __init__(self, learning_rate, **kwargs):
......@@ -294,7 +309,37 @@ class SGDOptimizer(Optimizer):
class MomentumOptimizer(Optimizer):
"""Simple Momentum optimizer with velocity state
"""
Simple Momentum optimizer with velocity state
This optimizer has a flag for Nestrov Momentum.
The update equations are as follows:
.. math::
& velocity = mu * velocity + gradient
& if (use\_nesterov):
& param = param - gradient * learning\_rate + mu * velocity * learning\_rate
& else:
& param = param - learning\_rate * velocity
Args:
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
momentum (float): momentum factor
use_nesterov (bool): enables Nesterov momentum
Examples:
.. code-block:: python
optimizer = MomentumOptimizer(learning_rate=0.2, momentum=0.1)
optimizer.minimize(cost)
"""
_velocity_acc_str = "velocity"
......@@ -614,6 +659,7 @@ class DecayedAdagradOptimizer(Optimizer):
class AdadeltaOptimizer(Optimizer):
"""
**Adadelta Optimizer**
Simple Adadelta optimizer with average squared grad state and
average squared update state.
The details of adadelta please refer to this
......@@ -703,7 +749,7 @@ class RMSPropOptimizer(Optimizer):
.. math::
r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\
w & = w - \\frac{\\eta} {\\sqrt{r(w,t) + \\epsilon}} \\nabla Q_{i}(w)
......@@ -844,7 +890,9 @@ class ModelAverage(Optimizer):
max_average_window: The maximum size of average window.
Examples:
...
.. code-block:: python
optimizer = fluid.optimizer.Momentum()
_, params_grads = optimizer.minimize(cost)
model_average = fluid.optimizer.ModelAverage(params_grads, 0.15,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册