diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py index 7efcd51af42629c2fe739b3804f90c0660bb0005..3bab1f25f00f231ebe1b62beee556f26014b5477 100644 --- a/python/paddle/optimizer/rmsprop.py +++ b/python/paddle/optimizer/rmsprop.py @@ -86,60 +86,61 @@ class RMSProp(Optimizer): parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. This parameter is required in dygraph mode. And you can specify different options for different parameter groups such as the learning rate, weight decay, etc, - then the parameters are list of dict. Note that the learning_rate in paramter groups + then the parameters are list of dict. Note that the learning_rate in parameter groups represents the scale of base learning_rate. The default value is None in static graph mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. - It canbe a float value as coeff of L2 regularization or \ + It can be a float value as coeff of L2 regularization or \ :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. Default None, meaning there is no regularization. - grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of - some derived class of ``GradientClipBase`` . There are three cliping strategies + grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of + some derived class of ``GradientClipBase`` . There are three clipping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping. name (str, optional): This parameter is used by developers to print debugging information. For details, please refer to :ref:`api_guide_Name`. Default is None. Examples: - .. code-block:: python - - import paddle - - inp = paddle.rand([10,10], dtype="float32") - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - - rmsprop = paddle.optimizer.RMSProp(learning_rate=0.1, - parameters=linear.parameters(), - weight_decay=0.01) - out.backward() - rmsprop.step() - rmsprop.clear_grad() - - #Note that the learning_rate of linear_2 is 0.01. - linear_1 = paddle.nn.Linear(10, 10) - linear_2 = paddle.nn.Linear(10, 10) - inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) - out = linear_1(inp) - out = linear_2(out) - loss = paddle.mean(out) - rmsprop = paddle.optimizer.RMSProp( - learning_rate=0.1, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1 - }], - weight_decay=0.01) - out.backward() - rmsprop.step() - rmsprop.clear_grad() + .. code-block:: python + + >>> import paddle + + >>> inp = paddle.rand([10,10], dtype="float32") + >>> linear = paddle.nn.Linear(10, 10) + >>> out = linear(inp) + >>> loss = paddle.mean(out) + + >>> rmsprop = paddle.optimizer.RMSProp(learning_rate=0.1, + ... parameters=linear.parameters(), + ... weight_decay=0.01) + >>> out.backward() + >>> rmsprop.step() + >>> rmsprop.clear_grad() + + >>> # Note that the learning_rate of linear_2 is 0.01. + >>> linear_1 = paddle.nn.Linear(10, 10) + >>> linear_2 = paddle.nn.Linear(10, 10) + >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) + >>> out = linear_1(inp) + >>> out = linear_2(out) + >>> loss = paddle.mean(out) + >>> rmsprop = paddle.optimizer.RMSProp( + ... learning_rate=0.1, + ... parameters=[{ + ... 'params': linear_1.parameters() + ... }, { + ... 'params': linear_2.parameters(), + ... 'weight_decay': 0.001, + ... 'learning_rate': 0.1 + ... }], + ... weight_decay=0.01 + ... ) + >>> out.backward() + >>> rmsprop.step() + >>> rmsprop.clear_grad() """ _momentum_acc_str = "momentum"