diff --git a/python/paddle/optimizer/lbfgs.py b/python/paddle/optimizer/lbfgs.py index 7afd59e65ade1873859be01401756a46331ea1de..a3efa46ea5ef5b9e41daeb38653e6babdedec78a 100644 --- a/python/paddle/optimizer/lbfgs.py +++ b/python/paddle/optimizer/lbfgs.py @@ -25,7 +25,7 @@ __all__ = [] def _cubic_interpolate(x1, f1, g1, x2, f2, g2, bounds=None): r"""Cubic interpolation between (x1, f1, g1) and (x2, f2, g2). - Use two points and their gradient to determine a cubic function and get the minimun point + Use two points and their gradient to determine a cubic function and get the minimum point between them in the cubic curve. Reference: @@ -38,7 +38,7 @@ def _cubic_interpolate(x1, f1, g1, x2, f2, g2, bounds=None): bounds: bounds of interpolation area Returns: - min_pos: the minimun point between the specified points in the cubic curve. + min_pos: the minimum point between the specified points in the cubic curve. """ # Compute bounds of interpolation area if bounds is not None: @@ -338,14 +338,14 @@ class LBFGS(Optimizer): parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. The default value is None. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ + It can be a float value as coeff of L2 regularization or \ :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ the regularization setting here in optimizer will be ignored for this parameter. \ Otherwise, the regularization setting here in optimizer will take effect. \ Default None, meaning there is no regularization. - grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of \ - some derived class of ``GradientClipBase`` . There are three cliping strategies \ + grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of \ + some derived class of ``GradientClipBase`` . There are three clipping strategies \ ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , \ :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping. name (str, optional): Normally there is no need for user to set this property. @@ -358,45 +358,43 @@ class LBFGS(Optimizer): Examples: .. code-block:: python - import paddle - import numpy as np - - paddle.disable_static() - np.random.seed(0) - np_w = np.random.rand(1).astype(np.float32) - np_x = np.random.rand(1).astype(np.float32) - - inputs = [np.random.rand(1).astype(np.float32) for i in range(10)] - # y = 2x - targets = [2 * x for x in inputs] - - class Net(paddle.nn.Layer): - def __init__(self): - super().__init__() - w = paddle.to_tensor(np_w) - self.w = paddle.create_parameter(shape=w.shape, dtype=w.dtype, default_initializer=paddle.nn.initializer.Assign(w)) - - def forward(self, x): - return self.w * x - - net = Net() - opt = paddle.optimizer.LBFGS(learning_rate=1, max_iter=1, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn='strong_wolfe', parameters=net.parameters()) - def train_step(inputs, targets): - def closure(): - outputs = net(inputs) - loss = paddle.nn.functional.mse_loss(outputs, targets) - print('loss: ', loss.item()) - opt.clear_grad() - loss.backward() - return loss - opt.step(closure) - - - for input, target in zip(inputs, targets): - input = paddle.to_tensor(input) - target = paddle.to_tensor(target) - train_step(input, target) - + >>> import paddle + >>> import numpy as np + + >>> paddle.disable_static() + >>> np.random.seed(0) + >>> np_w = np.random.rand(1).astype(np.float32) + >>> np_x = np.random.rand(1).astype(np.float32) + + >>> inputs = [np.random.rand(1).astype(np.float32) for i in range(10)] + >>> # y = 2x + >>> targets = [2 * x for x in inputs] + + >>> class Net(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... w = paddle.to_tensor(np_w) + ... self.w = paddle.create_parameter(shape=w.shape, dtype=w.dtype, default_initializer=paddle.nn.initializer.Assign(w)) + ... + ... def forward(self, x): + ... return self.w * x + ... + >>> net = Net() + >>> opt = paddle.optimizer.LBFGS(learning_rate=1, max_iter=1, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn='strong_wolfe', parameters=net.parameters()) + >>> def train_step(inputs, targets): + ... def closure(): + ... outputs = net(inputs) + ... loss = paddle.nn.functional.mse_loss(outputs, targets) + ... print('loss: ', loss.item()) + ... opt.clear_grad() + ... loss.backward() + ... return loss + ... opt.step(closure) + ... + >>> for input, target in zip(inputs, targets): + ... input = paddle.to_tensor(input) + ... target = paddle.to_tensor(target) + ... train_step(input, target) """ def __init__( @@ -458,41 +456,41 @@ class LBFGS(Optimizer): Examples: .. code-block:: python - import paddle - - paddle.disable_static() - - net = paddle.nn.Linear(10, 10) - opt = paddle.optimizer.LBFGS( - learning_rate=1, - max_iter=1, - max_eval=None, - tolerance_grad=1e-07, - tolerance_change=1e-09, - history_size=100, - line_search_fn='strong_wolfe', - parameters=net.parameters(), - ) - - def train_step(inputs, targets): - def closure(): - outputs = net(inputs) - loss = paddle.nn.functional.mse_loss(outputs, targets) - opt.clear_grad() - loss.backward() - return loss - - opt.step(closure) - - inputs = paddle.rand([10, 10], dtype="float32") - targets = paddle.to_tensor([2 * x for x in inputs]) - - n_iter = 0 - while n_iter < 20: - loss = train_step(inputs, targets) - n_iter = opt.state_dict()["state"]["func_evals"] - print("n_iter:", n_iter) - + >>> import paddle + + >>> paddle.disable_static() + + >>> net = paddle.nn.Linear(10, 10) + >>> opt = paddle.optimizer.LBFGS( + ... learning_rate=1, + ... max_iter=1, + ... max_eval=None, + ... tolerance_grad=1e-07, + ... tolerance_change=1e-09, + ... history_size=100, + ... line_search_fn='strong_wolfe', + ... parameters=net.parameters(), + >>> ) + + >>> def train_step(inputs, targets): + ... def closure(): + ... outputs = net(inputs) + ... loss = paddle.nn.functional.mse_loss(outputs, targets) + ... opt.clear_grad() + ... loss.backward() + ... return loss + ... + ... opt.step(closure) + ... + >>> inputs = paddle.rand([10, 10], dtype="float32") + >>> targets = paddle.to_tensor([2 * x for x in inputs]) + + >>> n_iter = 0 + >>> while n_iter < 20: + ... loss = train_step(inputs, targets) + ... n_iter = opt.state_dict()["state"]["func_evals"] + ... print("n_iter:", n_iter) + ... """ packed_state = {} @@ -559,34 +557,34 @@ class LBFGS(Optimizer): Examples: .. code-block:: python - import paddle - - paddle.disable_static() - - inputs = paddle.rand([10, 10], dtype="float32") - targets = paddle.to_tensor([2 * x for x in inputs]) - - net = paddle.nn.Linear(10, 10) - opt = paddle.optimizer.LBFGS( - learning_rate=1, - max_iter=1, - max_eval=None, - tolerance_grad=1e-07, - tolerance_change=1e-09, - history_size=100, - line_search_fn='strong_wolfe', - parameters=net.parameters(), - ) - - def closure(): - outputs = net(inputs) - loss = paddle.nn.functional.mse_loss(outputs, targets) - print("loss:", loss.item()) - opt.clear_grad() - loss.backward() - return loss - - opt.step(closure) + >>> import paddle + + >>> paddle.disable_static() + + >>> inputs = paddle.rand([10, 10], dtype="float32") + >>> targets = paddle.to_tensor([2 * x for x in inputs]) + + >>> net = paddle.nn.Linear(10, 10) + >>> opt = paddle.optimizer.LBFGS( + ... learning_rate=1, + ... max_iter=1, + ... max_eval=None, + ... tolerance_grad=1e-07, + ... tolerance_change=1e-09, + ... history_size=100, + ... line_search_fn='strong_wolfe', + ... parameters=net.parameters(), + >>> ) + + >>> def closure(): + ... outputs = net(inputs) + ... loss = paddle.nn.functional.mse_loss(outputs, targets) + ... print("loss:", loss.item()) + ... opt.clear_grad() + ... loss.backward() + ... return loss + ... + >>> opt.step(closure) """ with paddle.no_grad():