未验证 提交 e79133e8 编写于 作者: C Candy2Tang 提交者: GitHub

[xdoctest][task 109] Reformat example code with google style in...

[xdoctest][task 109]  Reformat example code with google style in python/paddle/optimizer/lr.py (#56225)

* [xdoctest][task 109] test=docs_preview

* fix whitespace test=docs_preview

* Apply suggestions from code review

* fix indent and legacy fluid apis

---------
Co-authored-by: NNyakku Shigure <sigure.qaq@gmail.com>
上级 b39f5f62
......@@ -72,31 +72,31 @@ class LRScheduler:
.. code-block:: python
import paddle
from paddle.optimizer.lr import LRScheduler
class StepDecay(LRScheduler):
def __init__(self,
learning_rate,
step_size,
gamma=0.1,
last_epoch=-1,
verbose=False):
if not isinstance(step_size, int):
raise TypeError(
"The type of 'step_size' must be 'int', but received %s." %
type(step_size))
if gamma >= 1.0:
raise ValueError('gamma should be < 1.0.')
self.step_size = step_size
self.gamma = gamma
super().__init__(learning_rate, last_epoch, verbose)
def get_lr(self):
i = self.last_epoch // self.step_size
return self.base_lr * (self.gamma**i)
>>> import paddle
>>> from paddle.optimizer.lr import LRScheduler
>>> class StepDecay(LRScheduler):
... def __init__(self,
... learning_rate,
... step_size,
... gamma=0.1,
... last_epoch=-1,
... verbose=False):
... if not isinstance(step_size, int):
... raise TypeError(
... "The type of 'step_size' must be 'int', but received %s." %
... type(step_size))
... if gamma >= 1.0:
... raise ValueError('gamma should be < 1.0.')
...
... self.step_size = step_size
... self.gamma = gamma
... super().__init__(learning_rate, last_epoch, verbose)
...
... def get_lr(self):
... i = self.last_epoch // self.step_size
... return self.base_lr * (self.gamma**i)
...
"""
def __init__(self, learning_rate=0.1, last_epoch=-1, verbose=False):
......@@ -136,29 +136,29 @@ class LRScheduler:
Examples:
.. code-block:: python
import paddle
value = paddle.arange(26, dtype='float32')
a = paddle.reshape(value, [2, 13])
linear = paddle.nn.Linear(13, 5)
adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
parameters = linear.parameters())
out = linear(a)
out.backward()
adadelta.step()
adadelta.clear_grad()
>>> import paddle
>>> value = paddle.arange(26, dtype='float32')
>>> a = paddle.reshape(value, [2, 13])
>>> linear = paddle.nn.Linear(13, 5)
>>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
... parameters = linear.parameters())
>>> out = linear(a)
>>> out.backward()
>>> adadelta.step()
>>> adadelta.clear_grad()
Examples:
.. code-block:: python
import paddle
value = paddle.arange(26, dtype='float32')
a = paddle.reshape(value, [2, 13])
linear = paddle.nn.Linear(13, 5)
adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
parameters = linear.parameters())
out = linear(a)
out.backward()
adadelta.step()
adadelta.clear_grad()
>>> import paddle
>>> value = paddle.arange(26, dtype='float32')
>>> a = paddle.reshape(value, [2, 13])
>>> linear = paddle.nn.Linear(13, 5)
>>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95,
... parameters = linear.parameters())
>>> out = linear(a)
>>> out.backward()
>>> adadelta.step()
>>> adadelta.clear_grad()
"""
if epoch is None:
self.last_epoch += 1
......@@ -275,57 +275,57 @@ class NoamDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
...
"""
def __init__(
......@@ -385,56 +385,56 @@ class PiecewiseDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
"""
def __init__(self, boundaries, values, last_epoch=-1, verbose=False):
......@@ -482,54 +482,54 @@ class NaturalExpDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
"""
def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False):
......@@ -569,57 +569,57 @@ class InverseTimeDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
...
"""
def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False):
......@@ -672,56 +672,56 @@ class PolynomialDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
"""
def __init__(
......@@ -803,58 +803,58 @@ class LinearWarmup(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.LinearWarmup(
learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.LinearWarmup(
... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.LinearWarmup(
learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.LinearWarmup(
... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
"""
def __init__(
......@@ -943,56 +943,56 @@ class ExponentialDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
"""
def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False):
......@@ -1041,56 +1041,56 @@ class MultiStepDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
"""
def __init__(
......@@ -1155,56 +1155,56 @@ class StepDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
"""
def __init__(
......@@ -1259,57 +1259,57 @@ class LambdaDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
...
"""
def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False):
......@@ -1367,57 +1367,57 @@ class ReduceOnPlateau(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step(loss) # If you update learning rate each step
# scheduler.step(loss) # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step(loss) # If you update learning rate each step
... # scheduler.step(loss) # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step(out[0]) # If you update learning rate each step
# scheduler.step(out[0]) # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step(out[0]) # If you update learning rate each step
... # scheduler.step(out[0]) # If you update learning rate each epoch
...
"""
def __init__(
......@@ -1598,56 +1598,56 @@ class CosineAnnealingDecay(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(20):
for batch_id in range(5):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(20):
... for batch_id in range(5):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
"""
def __init__(
......@@ -1722,23 +1722,23 @@ class MultiplicativeDecay(LRScheduler):
.. code-block:: python
import paddle
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
>>> import paddle
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(20):
... for batch_id in range(5):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # If you update learning rate each step
... # scheduler.step() # If you update learning rate each epoch
...
"""
def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False):
......@@ -1803,55 +1803,55 @@ class OneCycleLR(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(5):
for batch_id in range(20):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # You should update learning rate each step
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(5):
... for batch_id in range(20):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # You should update learning rate each step
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(5):
for batch_id in range(20):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # You should update learning rate each step
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(5):
... for batch_id in range(20):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # You should update learning rate each step
...
"""
def __init__(
......@@ -2053,55 +2053,55 @@ class CyclicLR(LRScheduler):
.. code-block:: python
:name: code-example1
# Example1: train on default dynamic graph mode
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(5):
for batch_id in range(20):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # You should update learning rate each step
>>> # Example1: train on default dynamic graph mode
>>> import paddle
>>> import numpy as np
>>> # train on default dynamic graph mode
>>> linear = paddle.nn.Linear(10, 10)
>>> scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True)
>>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
>>> for epoch in range(5):
... for batch_id in range(20):
... x = paddle.uniform([10, 10])
... out = linear(x)
... loss = paddle.mean(out)
... loss.backward()
... sgd.step()
... sgd.clear_gradients()
... scheduler.step() # You should update learning rate each step
.. code-block:: python
:name: code-example2
# Example2: train on static graph mode
import paddle
import numpy as np
paddle.enable_static()
main_prog = paddle.static.Program()
start_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
x = paddle.static.data(name='x', shape=[None, 4, 5])
y = paddle.static.data(name='y', shape=[None, 4, 5])
z = paddle.static.nn.fc(x, 100)
loss = paddle.mean(z)
scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5,
max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler)
sgd.minimize(loss)
exe = paddle.static.Executor()
exe.run(start_prog)
for epoch in range(5):
for batch_id in range(20):
out = exe.run(
main_prog,
feed={
'x': np.random.randn(3, 4, 5).astype('float32'),
'y': np.random.randn(3, 4, 5).astype('float32')
},
fetch_list=loss.name)
scheduler.step() # You should update learning rate each step
>>> # Example2: train on static graph mode
>>> import paddle
>>> import numpy as np
>>> paddle.enable_static()
>>> main_prog = paddle.static.Program()
>>> start_prog = paddle.static.Program()
>>> with paddle.static.program_guard(main_prog, start_prog):
... x = paddle.static.data(name='x', shape=[None, 4, 5])
... y = paddle.static.data(name='y', shape=[None, 4, 5])
... z = paddle.static.nn.fc(x, 100)
... loss = paddle.mean(z)
... scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5,
... max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True)
... sgd = paddle.optimizer.SGD(learning_rate=scheduler)
... sgd.minimize(loss)
...
>>> exe = paddle.static.Executor()
>>> exe.run(start_prog)
>>> for epoch in range(5):
... for batch_id in range(20):
... out = exe.run(
... main_prog,
... feed={
... 'x': np.random.randn(3, 4, 5).astype('float32'),
... 'y': np.random.randn(3, 4, 5).astype('float32')
... },
... fetch_list=loss.name)
... scheduler.step() # You should update learning rate each step
"""
def __init__(
......@@ -2256,10 +2256,10 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
global_step = paddle.optimizer.lr.autoincreased_step_counter(
counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
>>> import paddle
>>> paddle.enable_static()
>>> global_step = paddle.optimizer.lr.autoincreased_step_counter(
... counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
"""
helper = LayerHelper('global_step_counter')
if counter_name is None:
......@@ -2305,42 +2305,39 @@ def noam_decay(d_model, warmup_steps, learning_rate=1.0):
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
# set hyper parameters
base_lr = 0.01
d_model = 2
current_steps = 20
warmup_steps = 200
# compute
lr_value = base_lr * np.power(d_model, -0.5) * np.min([
np.power(current_steps, -0.5),
np.power(warmup_steps, -1.5) * current_steps])
Please reference `attention is all you need
<https://arxiv.org/pdf/1706.03762.pdf>`_.
>>> import numpy as np
>>> # set hyper parameters
>>> base_lr = 0.01
>>> d_model = 2
>>> current_steps = 20
>>> warmup_steps = 200
>>> # compute
>>> lr_value = base_lr * np.power(d_model, -0.5) * np.min([
... np.power(current_steps, -0.5),
... np.power(warmup_steps, -1.5) * current_steps])
Please reference `attention is all you need <https://arxiv.org/pdf/1706.03762.pdf>`_.
Args:
d_model(Variable): The dimensionality of input and output of model.
warmup_steps(Variable): A super parameter.
learning_rate(Variable|float|int): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number. Default 1.0
Returns:
The decayed learning rate.
Examples:
.. code-block:: python
import paddle
warmup_steps = 100
learning_rate = 0.01
lr = paddle.optimizer.lr.noam_decay(
1/(warmup_steps *(learning_rate ** 2)),
warmup_steps,
learning_rate)
>>> import paddle
>>> warmup_steps = 100
>>> learning_rate = 0.01
>>> lr = paddle.optimizer.lr.noam_decay(
... 1/(warmup_steps *(learning_rate ** 2)),
... warmup_steps,
... learning_rate)
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
......@@ -2369,6 +2366,8 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Decayed learning rate calculates as follows:
.. code-block:: text
>>> if staircase == True:
>>> decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps)
>>> else:
......@@ -2390,17 +2389,16 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=paddle.optimizer.lr.exponential_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
>>> import paddle
>>> paddle.enable_static()
>>> base_lr = 0.1
>>> lr = paddle.optimizer.lr.exponential_decay(
... learning_rate=base_lr,
... decay_steps=10000,
... decay_rate=0.5,
... staircase=True
... )
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
......@@ -2428,6 +2426,8 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Decayed learning rate calculates as follows:
.. code-block:: text
>>> if not staircase:
>>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
>>> else:
......@@ -2449,18 +2449,16 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
paddle.enable_static()
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=paddle.optimizer.lr.natural_exp_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
>>> import paddle
>>> paddle.enable_static()
>>> base_lr = 0.1
>>> lr = paddle.optimizer.lr.natural_exp_decay(
... learning_rate=base_lr,
... decay_steps=10000,
... decay_rate=0.5,
... staircase=True
... )
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
......@@ -2479,7 +2477,6 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
Applies inverse time decay to the initial learning rate.
When training a model, it is often recommended to lower the learning rate as the
......@@ -2488,6 +2485,8 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Decayed learning rate calculates as follows:
.. code-block:: text
>>> if staircase == True:
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
>>> else:
......@@ -2509,16 +2508,15 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
paddle.enable_static()
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=paddle.optimizer.lr.inverse_time_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
>>> import paddle
>>> paddle.enable_static()
>>> base_lr = 0.1
>>> lr = paddle.optimizer.lr.inverse_time_decay(
... learning_rate=base_lr,
... decay_steps=10000,
... decay_rate=0.5,
... staircase=True
... )
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
......@@ -2565,13 +2563,16 @@ def polynomial_decay(
Examples:
.. code-block:: python
import paddle
start_lr = 0.01
total_step = 5000
end_lr = 0
lr = paddle.optimizer.lr.polynomial_decay(
start_lr, total_step, end_lr, power=1)
>>> import paddle
>>> start_lr = 0.01
>>> total_step = 5000
>>> end_lr = 0
>>> lr = paddle.optimizer.lr.polynomial_decay(
... start_lr,
... total_step,
... end_lr,
... power=1
... )
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
......@@ -2611,7 +2612,6 @@ def polynomial_decay(
def piecewise_decay(boundaries, values):
"""
Applies piecewise decay to the initial learning rate.
The algorithm can be described as the code below.
......@@ -2626,6 +2626,7 @@ def piecewise_decay(boundaries, values):
learning_rate = 0.5
else:
learning_rate = 0.1
Args:
boundaries: A list of steps numbers.
values: A list of learning rate values that will be picked during
......@@ -2637,16 +2638,15 @@ def piecewise_decay(boundaries, values):
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
optimizer = paddle.optimizer.Momentum(
momentum=0.9,
learning_rate=paddle.optimizer.lr.PiecewiseDecay(boundaries, values),
weight_decay=paddle.regularizer.L2Decay(1e-4))
>>> import paddle
>>> paddle.enable_static()
>>> boundaries = [10000, 20000]
>>> values = [1.0, 0.5, 0.1]
>>> optimizer = paddle.optimizer.Momentum(
... momentum=0.9,
... learning_rate=paddle.optimizer.lr.PiecewiseDecay(boundaries, values),
... weight_decay=paddle.regularizer.L2Decay(1e-4)
... )
"""
with default_main_program()._lr_schedule_guard():
if len(values) - len(boundaries) != 1:
......@@ -2714,10 +2714,10 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
Examples:
.. code-block:: python
import paddle
base_lr = 0.1
lr = paddle.optimizer.lr.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120)
>>> import paddle
>>> base_lr = 0.1
>>> lr = paddle.optimizer.lr.cosine_decay(
>>> learning_rate = base_lr, step_each_epoch=10000, epochs=120)
"""
check_type(
learning_rate, 'learning_rate', (float, Variable), 'cosine_decay'
......@@ -2771,30 +2771,31 @@ def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
Returns:
Variable: Warm-up learning rate with the same data type as learning_rate.
Examples:
.. code-block:: python
import paddle.fluid as fluid
boundaries = [100, 200]
lr_steps = [0.1, 0.01, 0.001]
learning_rate = fluid.layers.piecewise_decay(boundaries, lr_steps) #case1, 1D-Tensor
#learning_rate = 0.1 #case2, single-value
warmup_steps = 50
start_lr = 1. / 3.
end_lr = 0.1
decayed_lr = fluid.layers.linear_lr_warmup(learning_rate,
warmup_steps, start_lr, end_lr)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
out, = exe.run(fetch_list=[decayed_lr.name])
print(out)
# case1: [0.33333334]
# case2: [0.33333334]
>>> import paddle
>>> paddle.enable_static()
>>> boundaries = [100, 200]
>>> lr_steps = [0.1, 0.01, 0.001]
>>> learning_rate = paddle.optimizer.lr.piecewise_decay(boundaries, lr_steps) # case1, 1D-Tensor
>>> # learning_rate = 0.1 # case2, single-value
>>> warmup_steps = 50
>>> start_lr = 0.1
>>> end_lr = 1. / 3.
>>> decayed_lr = paddle.optimizer.lr.linear_lr_warmup(
... learning_rate,
... warmup_steps,
... start_lr,
... end_lr
... )
>>> place = paddle.CPUPlace()
>>> exe = paddle.static.Executor(place)
>>> exe.run(paddle.static.default_startup_program())
>>> out, = exe.run(fetch_list=[decayed_lr.name])
>>> print(out)
[0.1]
"""
dtype = 'float32'
if isinstance(learning_rate, Variable):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册