diff --git a/python/paddle/incubate/optimizer/functional/bfgs.py b/python/paddle/incubate/optimizer/functional/bfgs.py index 52e1d6be790501c3207b3603ecff13a6a5ff0f28..9d98460e03c3ca8b06ef7fc4518c96ca9b112556 100644 --- a/python/paddle/incubate/optimizer/functional/bfgs.py +++ b/python/paddle/incubate/optimizer/functional/bfgs.py @@ -79,20 +79,48 @@ def minimize_bfgs( Examples: .. code-block:: python + :name: code-example1 + # Example1: 1D Grid Parameters import paddle + # Randomly simulate a batch of input data + inputs = paddle. normal(shape=(100, 1)) + labels = inputs * 2.0 + # define the loss function + def loss(w): + y = w * inputs + return paddle.nn.functional.square_error_cost(y, labels).mean() + # Initialize weight parameters + w = paddle.normal(shape=(1,)) + # Call the bfgs method to solve the weight that makes the loss the smallest, and update the parameters + for epoch in range(0, 10): + # Call the bfgs method to optimize the loss, note that the third parameter returned represents the weight + w_update = paddle.incubate.optimizer.functional.minimize_bfgs(loss, w)[2] + # Use paddle.assign to update parameters in place + paddle. assign(w_update, w) - def func(x): - return paddle.dot(x, x) - - x0 = paddle.to_tensor([1.3, 2.7]) - results = paddle.incubate.optimizer.functional.minimize_bfgs(func, x0) - print("is_converge: ", results[0]) - print("the minimum of func is: ", results[2]) - # is_converge: is_converge: Tensor(shape=[1], dtype=bool, place=Place(gpu:0), stop_gradient=True, - # [True]) - # the minimum of func is: Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0., 0.]) + .. code-block:: python + :name: code-example2 + + # Example2: Multidimensional Grid Parameters + import paddle + def flatten(x): + return x. flatten() + def unflatten(x): + return x.reshape((2,2)) + # Assume the network parameters are more than one dimension + def net(x): + assert len(x.shape) > 1 + return x.square().mean() + # function to be optimized + def bfgs_f(flatten_x): + return net(unflatten(flatten_x)) + x = paddle.rand([2,2]) + for i in range(0, 10): + # Flatten x before using minimize_bfgs + x_update = paddle.incubate.optimizer.functional.minimize_bfgs(bfgs_f, flatten(x))[2] + # unflatten x_update, then update parameters + paddle. assign(unflatten(x_update), x) """ if dtype not in ['float32', 'float64']: diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py index 06d8ba748c018987160ae1ea996cef1559e88b97..af30efe44a8daa733b249a5505708cdea6bcce45 100644 --- a/python/paddle/incubate/optimizer/functional/lbfgs.py +++ b/python/paddle/incubate/optimizer/functional/lbfgs.py @@ -80,20 +80,49 @@ def minimize_lbfgs( Examples: .. code-block:: python + :name: code-example1 + # Example1: 1D Grid Parameters import paddle + # Randomly simulate a batch of input data + inputs = paddle. normal(shape=(100, 1)) + labels = inputs * 2.0 + # define the loss function + def loss(w): + y = w * inputs + return paddle.nn.functional.square_error_cost(y, labels).mean() + # Initialize weight parameters + w = paddle.normal(shape=(1,)) + # Call the bfgs method to solve the weight that makes the loss the smallest, and update the parameters + for epoch in range(0, 10): + # Call the bfgs method to optimize the loss, note that the third parameter returned represents the weight + w_update = paddle.incubate.optimizer.functional.minimize_bfgs(loss, w)[2] + # Use paddle.assign to update parameters in place + paddle. assign(w_update, w) + + .. code-block:: python + :name: code-example2 + + # Example2: Multidimensional Grid Parameters + import paddle + def flatten(x): + return x. flatten() + def unflatten(x): + return x.reshape((2,2)) + # Assume the network parameters are more than one dimension + def net(x): + assert len(x.shape) > 1 + return x.square().mean() + # function to be optimized + def bfgs_f(flatten_x): + return net(unflatten(flatten_x)) + x = paddle.rand([2,2]) + for i in range(0, 10): + # Flatten x before using minimize_bfgs + x_update = paddle.incubate.optimizer.functional.minimize_bfgs(bfgs_f, flatten(x))[2] + # unflatten x_update, then update parameters + paddle. assign(unflatten(x_update), x) - def func(x): - return paddle.dot(x, x) - - x0 = paddle.to_tensor([1.3, 2.7]) - results = paddle.incubate.optimizer.functional.minimize_lbfgs(func, x0) - print("is_converge: ", results[0]) - print("the minimum of func is: ", results[2]) - # is_converge: is_converge: Tensor(shape=[1], dtype=bool, place=Place(gpu:0), stop_gradient=True, - # [True]) - # the minimum of func is: Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0., 0.]) """ if dtype not in ['float32', 'float64']: raise ValueError( diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 580c545ef34397e64c3d123b353e51e98b0d288a..e628509e52afc1b94ce3171b22eeb15a10581501 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -125,6 +125,19 @@ class LRScheduler: Returns: None + Examples: + .. code-block:: python + + import paddle + value = paddle.arange(26, dtype='float32') + a = paddle.reshape(value, [2, 13]) + linear = paddle.nn.Linear(13, 5) + adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, + parameters = linear.parameters()) + out = linear(a) + out.backward() + adadelta.step() + adadelta.clear_grad() Examples: .. code-block:: python