未验证 提交 e79133e8 编写于 作者: C Candy2Tang 提交者: GitHub

[xdoctest][task 109] Reformat example code with google style in...

[xdoctest][task 109]  Reformat example code with google style in python/paddle/optimizer/lr.py (#56225)

* [xdoctest][task 109] test=docs_preview

* fix whitespace test=docs_preview

* Apply suggestions from code review

* fix indent and legacy fluid apis

---------
Co-authored-by: NNyakku Shigure <sigure.qaq@gmail.com>
上级 b39f5f62
...@@ -72,31 +72,31 @@ class LRScheduler: ...@@ -72,31 +72,31 @@ class LRScheduler:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
from paddle.optimizer.lr import LRScheduler >>> from paddle.optimizer.lr import LRScheduler
class StepDecay(LRScheduler): >>> class StepDecay(LRScheduler):
def __init__(self, ... def __init__(self,
learning_rate, ... learning_rate,
step_size, ... step_size,
gamma=0.1, ... gamma=0.1,
last_epoch=-1, ... last_epoch=-1,
verbose=False): ... verbose=False):
if not isinstance(step_size, int): ... if not isinstance(step_size, int):
raise TypeError( ... raise TypeError(
"The type of 'step_size' must be 'int', but received %s." % ... "The type of 'step_size' must be 'int', but received %s." %
type(step_size)) ... type(step_size))
if gamma >= 1.0: ... if gamma >= 1.0:
raise ValueError('gamma should be < 1.0.') ... raise ValueError('gamma should be < 1.0.')
...
self.step_size = step_size ... self.step_size = step_size
self.gamma = gamma ... self.gamma = gamma
super().__init__(learning_rate, last_epoch, verbose) ... super().__init__(learning_rate, last_epoch, verbose)
...
def get_lr(self): ... def get_lr(self):
i = self.last_epoch // self.step_size ... i = self.last_epoch // self.step_size
return self.base_lr * (self.gamma**i) ... return self.base_lr * (self.gamma**i)
...
""" """
def __init__(self, learning_rate=0.1, last_epoch=-1, verbose=False): def __init__(self, learning_rate=0.1, last_epoch=-1, verbose=False):
...@@ -136,29 +136,29 @@ class LRScheduler: ...@@ -136,29 +136,29 @@ class LRScheduler:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
value = paddle.arange(26, dtype='float32') >>> value = paddle.arange(26, dtype='float32')
a = paddle.reshape(value, [2, 13]) >>> a = paddle.reshape(value, [2, 13])
linear = paddle.nn.Linear(13, 5) >>> linear = paddle.nn.Linear(13, 5)
adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
parameters = linear.parameters()) ... parameters = linear.parameters())
out = linear(a) >>> out = linear(a)
out.backward() >>> out.backward()
adadelta.step() >>> adadelta.step()
adadelta.clear_grad() >>> adadelta.clear_grad()
Examples:
.. code-block:: python .. code-block:: python
import paddle
value = paddle.arange(26, dtype='float32') >>> import paddle
a = paddle.reshape(value, [2, 13]) >>> value = paddle.arange(26, dtype='float32')
linear = paddle.nn.Linear(13, 5) >>> a = paddle.reshape(value, [2, 13])
adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, >>> linear = paddle.nn.Linear(13, 5)
parameters = linear.parameters()) >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
out = linear(a) ... parameters = linear.parameters())
out.backward() >>> out = linear(a)
adadelta.step() >>> out.backward()
adadelta.clear_grad() >>> adadelta.step()
>>> adadelta.clear_grad()
""" """
if epoch is None: if epoch is None:
self.last_epoch += 1 self.last_epoch += 1
...@@ -275,57 +275,57 @@ class NoamDecay(LRScheduler): ...@@ -275,57 +275,57 @@ class NoamDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) >>> scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) ... scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
...
""" """
def __init__( def __init__(
...@@ -385,56 +385,56 @@ class PiecewiseDecay(LRScheduler): ...@@ -385,56 +385,56 @@ class PiecewiseDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) >>> scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) ... scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
""" """
def __init__(self, boundaries, values, last_epoch=-1, verbose=False): def __init__(self, boundaries, values, last_epoch=-1, verbose=False):
...@@ -482,54 +482,54 @@ class NaturalExpDecay(LRScheduler): ...@@ -482,54 +482,54 @@ class NaturalExpDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) >>> scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) ... scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
""" """
def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False):
...@@ -569,57 +569,57 @@ class InverseTimeDecay(LRScheduler): ...@@ -569,57 +569,57 @@ class InverseTimeDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) >>> scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) ... scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
...
""" """
def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False):
...@@ -672,56 +672,56 @@ class PolynomialDecay(LRScheduler): ...@@ -672,56 +672,56 @@ class PolynomialDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) >>> scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) ... scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
""" """
def __init__( def __init__(
...@@ -803,58 +803,58 @@ class LinearWarmup(LRScheduler): ...@@ -803,58 +803,58 @@ class LinearWarmup(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.LinearWarmup( >>> scheduler = paddle.optimizer.lr.LinearWarmup(
learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) ... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.LinearWarmup( ... scheduler = paddle.optimizer.lr.LinearWarmup(
learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) ... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
""" """
def __init__( def __init__(
...@@ -943,56 +943,56 @@ class ExponentialDecay(LRScheduler): ...@@ -943,56 +943,56 @@ class ExponentialDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) >>> scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) ... scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
""" """
def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False):
...@@ -1041,56 +1041,56 @@ class MultiStepDecay(LRScheduler): ...@@ -1041,56 +1041,56 @@ class MultiStepDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) >>> scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) ... scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
""" """
def __init__( def __init__(
...@@ -1155,56 +1155,56 @@ class StepDecay(LRScheduler): ...@@ -1155,56 +1155,56 @@ class StepDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) >>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) ... scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
""" """
def __init__( def __init__(
...@@ -1259,57 +1259,57 @@ class LambdaDecay(LRScheduler): ...@@ -1259,57 +1259,57 @@ class LambdaDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) >>> scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) ... scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
...
""" """
def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False):
...@@ -1367,57 +1367,57 @@ class ReduceOnPlateau(LRScheduler): ...@@ -1367,57 +1367,57 @@ class ReduceOnPlateau(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) >>> scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step(loss) # If you update learning rate each step ... scheduler.step(loss) # If you update learning rate each step
# scheduler.step(loss) # If you update learning rate each epoch ... # scheduler.step(loss) # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) ... scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step(out[0]) # If you update learning rate each step ... scheduler.step(out[0]) # If you update learning rate each step
# scheduler.step(out[0]) # If you update learning rate each epoch ... # scheduler.step(out[0]) # If you update learning rate each epoch
...
""" """
def __init__( def __init__(
...@@ -1598,56 +1598,56 @@ class CosineAnnealingDecay(LRScheduler): ...@@ -1598,56 +1598,56 @@ class CosineAnnealingDecay(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) >>> scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) ... scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
""" """
def __init__( def __init__(
...@@ -1722,23 +1722,23 @@ class MultiplicativeDecay(LRScheduler): ...@@ -1722,23 +1722,23 @@ class MultiplicativeDecay(LRScheduler):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True) >>> scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20): >>> for epoch in range(20):
for batch_id in range(5): ... for batch_id in range(5):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # If you update learning rate each step ... scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch ... # scheduler.step() # If you update learning rate each epoch
...
""" """
def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False):
...@@ -1803,55 +1803,55 @@ class OneCycleLR(LRScheduler): ...@@ -1803,55 +1803,55 @@ class OneCycleLR(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) >>> scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(5): >>> for epoch in range(5):
for batch_id in range(20): ... for batch_id in range(20):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # You should update learning rate each step ... scheduler.step() # You should update learning rate each step
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) ... scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(5): >>> for epoch in range(5):
for batch_id in range(20): ... for batch_id in range(20):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # You should update learning rate each step ... scheduler.step() # You should update learning rate each step
...
""" """
def __init__( def __init__(
...@@ -2053,55 +2053,55 @@ class CyclicLR(LRScheduler): ...@@ -2053,55 +2053,55 @@ class CyclicLR(LRScheduler):
.. code-block:: python .. code-block:: python
:name: code-example1 :name: code-example1
# Example1: train on default dynamic graph mode >>> # Example1: train on default dynamic graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# train on default dynamic graph mode >>> # train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10) >>> linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) >>> scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(5): >>> for epoch in range(5):
for batch_id in range(20): ... for batch_id in range(20):
x = paddle.uniform([10, 10]) ... x = paddle.uniform([10, 10])
out = linear(x) ... out = linear(x)
loss = paddle.mean(out) ... loss = paddle.mean(out)
loss.backward() ... loss.backward()
sgd.step() ... sgd.step()
sgd.clear_gradients() ... sgd.clear_gradients()
scheduler.step() # You should update learning rate each step ... scheduler.step() # You should update learning rate each step
.. code-block:: python .. code-block:: python
:name: code-example2 :name: code-example2
# Example2: train on static graph mode >>> # Example2: train on static graph mode
import paddle >>> import paddle
import numpy as np >>> import numpy as np
paddle.enable_static() >>> paddle.enable_static()
main_prog = paddle.static.Program() >>> main_prog = paddle.static.Program()
start_prog = paddle.static.Program() >>> start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog): >>> with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5]) ... x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5]) ... y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100) ... z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z) ... loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, ... scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5,
max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) ... max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss) ... sgd.minimize(loss)
...
exe = paddle.static.Executor() >>> exe = paddle.static.Executor()
exe.run(start_prog) >>> exe.run(start_prog)
for epoch in range(5): >>> for epoch in range(5):
for batch_id in range(20): ... for batch_id in range(20):
out = exe.run( ... out = exe.run(
main_prog, ... main_prog,
feed={ ... feed={
'x': np.random.randn(3, 4, 5).astype('float32'), ... 'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32') ... 'y': np.random.randn(3, 4, 5).astype('float32')
}, ... },
fetch_list=loss.name) ... fetch_list=loss.name)
scheduler.step() # You should update learning rate each step ... scheduler.step() # You should update learning rate each step
""" """
def __init__( def __init__(
...@@ -2256,10 +2256,10 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ...@@ -2256,10 +2256,10 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
paddle.enable_static() >>> paddle.enable_static()
global_step = paddle.optimizer.lr.autoincreased_step_counter( >>> global_step = paddle.optimizer.lr.autoincreased_step_counter(
counter_name='@LR_DECAY_COUNTER@', begin=0, step=1) ... counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
""" """
helper = LayerHelper('global_step_counter') helper = LayerHelper('global_step_counter')
if counter_name is None: if counter_name is None:
...@@ -2305,42 +2305,39 @@ def noam_decay(d_model, warmup_steps, learning_rate=1.0): ...@@ -2305,42 +2305,39 @@ def noam_decay(d_model, warmup_steps, learning_rate=1.0):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid >>> import numpy as np
import numpy as np >>> # set hyper parameters
# set hyper parameters >>> base_lr = 0.01
base_lr = 0.01 >>> d_model = 2
d_model = 2 >>> current_steps = 20
current_steps = 20 >>> warmup_steps = 200
warmup_steps = 200 >>> # compute
# compute >>> lr_value = base_lr * np.power(d_model, -0.5) * np.min([
lr_value = base_lr * np.power(d_model, -0.5) * np.min([ ... np.power(current_steps, -0.5),
np.power(current_steps, -0.5), ... np.power(warmup_steps, -1.5) * current_steps])
np.power(warmup_steps, -1.5) * current_steps])
Please reference `attention is all you need <https://arxiv.org/pdf/1706.03762.pdf>`_.
Please reference `attention is all you need
<https://arxiv.org/pdf/1706.03762.pdf>`_.
Args: Args:
d_model(Variable): The dimensionality of input and output of model. d_model(Variable): The dimensionality of input and output of model.
warmup_steps(Variable): A super parameter. warmup_steps(Variable): A super parameter.
learning_rate(Variable|float|int): The initial learning rate. If the type learning_rate(Variable|float|int): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number. Default 1.0 float32 or float64. It also can be set to python int number. Default 1.0
Returns: Returns:
The decayed learning rate. The decayed learning rate.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
warmup_steps = 100 >>> warmup_steps = 100
learning_rate = 0.01 >>> learning_rate = 0.01
lr = paddle.optimizer.lr.noam_decay( >>> lr = paddle.optimizer.lr.noam_decay(
1/(warmup_steps *(learning_rate ** 2)), ... 1/(warmup_steps *(learning_rate ** 2)),
warmup_steps, ... warmup_steps,
learning_rate) ... learning_rate)
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -2369,6 +2366,8 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -2369,6 +2366,8 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Decayed learning rate calculates as follows: Decayed learning rate calculates as follows:
.. code-block:: text
>>> if staircase == True: >>> if staircase == True:
>>> decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps) >>> decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps)
>>> else: >>> else:
...@@ -2390,17 +2389,16 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -2390,17 +2389,16 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
paddle.enable_static()
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=paddle.optimizer.lr.exponential_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
>>> paddle.enable_static()
>>> base_lr = 0.1
>>> lr = paddle.optimizer.lr.exponential_decay(
... learning_rate=base_lr,
... decay_steps=10000,
... decay_rate=0.5,
... staircase=True
... )
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -2428,6 +2426,8 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -2428,6 +2426,8 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Decayed learning rate calculates as follows: Decayed learning rate calculates as follows:
.. code-block:: text
>>> if not staircase: >>> if not staircase:
>>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps)) >>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
>>> else: >>> else:
...@@ -2449,18 +2449,16 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -2449,18 +2449,16 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid >>> import paddle
import paddle
paddle.enable_static()
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=paddle.optimizer.lr.natural_exp_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
>>> paddle.enable_static()
>>> base_lr = 0.1
>>> lr = paddle.optimizer.lr.natural_exp_decay(
... learning_rate=base_lr,
... decay_steps=10000,
... decay_rate=0.5,
... staircase=True
... )
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -2479,7 +2477,6 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -2479,7 +2477,6 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
""" """
Applies inverse time decay to the initial learning rate. Applies inverse time decay to the initial learning rate.
When training a model, it is often recommended to lower the learning rate as the When training a model, it is often recommended to lower the learning rate as the
...@@ -2488,6 +2485,8 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -2488,6 +2485,8 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Decayed learning rate calculates as follows: Decayed learning rate calculates as follows:
.. code-block:: text
>>> if staircase == True: >>> if staircase == True:
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) >>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
>>> else: >>> else:
...@@ -2509,16 +2508,15 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -2509,16 +2508,15 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid >>> import paddle
import paddle >>> paddle.enable_static()
paddle.enable_static() >>> base_lr = 0.1
base_lr = 0.1 >>> lr = paddle.optimizer.lr.inverse_time_decay(
sgd_optimizer = fluid.optimizer.SGD( ... learning_rate=base_lr,
learning_rate=paddle.optimizer.lr.inverse_time_decay( ... decay_steps=10000,
learning_rate=base_lr, ... decay_rate=0.5,
decay_steps=10000, ... staircase=True
decay_rate=0.5, ... )
staircase=True))
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -2565,13 +2563,16 @@ def polynomial_decay( ...@@ -2565,13 +2563,16 @@ def polynomial_decay(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
start_lr = 0.01 >>> start_lr = 0.01
total_step = 5000 >>> total_step = 5000
end_lr = 0 >>> end_lr = 0
lr = paddle.optimizer.lr.polynomial_decay( >>> lr = paddle.optimizer.lr.polynomial_decay(
start_lr, total_step, end_lr, power=1) ... start_lr,
... total_step,
... end_lr,
... power=1
... )
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -2611,7 +2612,6 @@ def polynomial_decay( ...@@ -2611,7 +2612,6 @@ def polynomial_decay(
def piecewise_decay(boundaries, values): def piecewise_decay(boundaries, values):
""" """
Applies piecewise decay to the initial learning rate. Applies piecewise decay to the initial learning rate.
The algorithm can be described as the code below. The algorithm can be described as the code below.
...@@ -2626,6 +2626,7 @@ def piecewise_decay(boundaries, values): ...@@ -2626,6 +2626,7 @@ def piecewise_decay(boundaries, values):
learning_rate = 0.5 learning_rate = 0.5
else: else:
learning_rate = 0.1 learning_rate = 0.1
Args: Args:
boundaries: A list of steps numbers. boundaries: A list of steps numbers.
values: A list of learning rate values that will be picked during values: A list of learning rate values that will be picked during
...@@ -2637,16 +2638,15 @@ def piecewise_decay(boundaries, values): ...@@ -2637,16 +2638,15 @@ def piecewise_decay(boundaries, values):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
paddle.enable_static() >>> paddle.enable_static()
boundaries = [10000, 20000] >>> boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1] >>> values = [1.0, 0.5, 0.1]
optimizer = paddle.optimizer.Momentum( >>> optimizer = paddle.optimizer.Momentum(
momentum=0.9, ... momentum=0.9,
learning_rate=paddle.optimizer.lr.PiecewiseDecay(boundaries, values), ... learning_rate=paddle.optimizer.lr.PiecewiseDecay(boundaries, values),
weight_decay=paddle.regularizer.L2Decay(1e-4)) ... weight_decay=paddle.regularizer.L2Decay(1e-4)
... )
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
if len(values) - len(boundaries) != 1: if len(values) - len(boundaries) != 1:
...@@ -2714,10 +2714,10 @@ def cosine_decay(learning_rate, step_each_epoch, epochs): ...@@ -2714,10 +2714,10 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
base_lr = 0.1 >>> base_lr = 0.1
lr = paddle.optimizer.lr.cosine_decay( >>> lr = paddle.optimizer.lr.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120) >>> learning_rate = base_lr, step_each_epoch=10000, epochs=120)
""" """
check_type( check_type(
learning_rate, 'learning_rate', (float, Variable), 'cosine_decay' learning_rate, 'learning_rate', (float, Variable), 'cosine_decay'
...@@ -2771,30 +2771,31 @@ def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): ...@@ -2771,30 +2771,31 @@ def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
Returns: Returns:
Variable: Warm-up learning rate with the same data type as learning_rate. Variable: Warm-up learning rate with the same data type as learning_rate.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid >>> import paddle
>>> paddle.enable_static()
boundaries = [100, 200] >>> boundaries = [100, 200]
lr_steps = [0.1, 0.01, 0.001] >>> lr_steps = [0.1, 0.01, 0.001]
learning_rate = fluid.layers.piecewise_decay(boundaries, lr_steps) #case1, 1D-Tensor >>> learning_rate = paddle.optimizer.lr.piecewise_decay(boundaries, lr_steps) # case1, 1D-Tensor
#learning_rate = 0.1 #case2, single-value >>> # learning_rate = 0.1 # case2, single-value
warmup_steps = 50 >>> warmup_steps = 50
start_lr = 1. / 3. >>> start_lr = 0.1
end_lr = 0.1 >>> end_lr = 1. / 3.
decayed_lr = fluid.layers.linear_lr_warmup(learning_rate, >>> decayed_lr = paddle.optimizer.lr.linear_lr_warmup(
warmup_steps, start_lr, end_lr) ... learning_rate,
... warmup_steps,
place = fluid.CPUPlace() ... start_lr,
exe = fluid.Executor(place) ... end_lr
exe.run(fluid.default_startup_program()) ... )
out, = exe.run(fetch_list=[decayed_lr.name]) >>> place = paddle.CPUPlace()
print(out) >>> exe = paddle.static.Executor(place)
# case1: [0.33333334] >>> exe.run(paddle.static.default_startup_program())
# case2: [0.33333334] >>> out, = exe.run(fetch_list=[decayed_lr.name])
>>> print(out)
[0.1]
""" """
dtype = 'float32' dtype = 'float32'
if isinstance(learning_rate, Variable): if isinstance(learning_rate, Variable):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册