未验证 提交 63f242b6 编写于 作者: L LoneRanger 提交者: GitHub

replace PiecewiseDecay, StepDecay, MultiStepDecay, LambdaDecay with 2.0 version (#53992)

* replace PiecewiseDecay(LearningRateDecay) with PiecewiseDecay(LRScheduler)

* fix bug

* fix bug

* replace the StepDecay,MultiStepDecay,LambdaDecay with 2.0 version
上级 54b86fd4
...@@ -23,13 +23,9 @@ from ..data_feeder import check_type ...@@ -23,13 +23,9 @@ from ..data_feeder import check_type
__all__ = [ __all__ = [
'NoamDecay', 'NoamDecay',
'PiecewiseDecay',
'PolynomialDecay', 'PolynomialDecay',
'LinearLrWarmup', 'LinearLrWarmup',
'ReduceLROnPlateau', 'ReduceLROnPlateau',
'StepDecay',
'MultiStepDecay',
'LambdaDecay',
] ]
...@@ -131,68 +127,6 @@ class LearningRateDecay: ...@@ -131,68 +127,6 @@ class LearningRateDecay:
raise NotImplementedError() raise NotImplementedError()
class PiecewiseDecay(LearningRateDecay):
"""
:api_attr: imperative
Piecewise decay scheduler.
The algorithm can be described as the code below.
.. code-block:: text
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
if global_step < 10000:
learning_rate = 1.0
elif 10000 <= global_step < 20000:
learning_rate = 0.5
else:
learning_rate = 0.1
Parameters:
boundaries(list): A list of steps numbers. The type of element in the list is python int.
values(list): A list of learning rate values that will be picked during
different step boundaries. The type of element in the list is python float.
begin(int): The begin step to initialize the global_step in the description above.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
with fluid.dygraph.guard():
emb = paddle.nn.Embedding(10, 10)
optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0),
parameter_list = emb.parameters() )
"""
def __init__(self, boundaries, values, begin, step=1, dtype='float32'):
super().__init__(begin, step, dtype)
self.boundaries = boundaries
self.values = values
self.vars = []
for value in values:
self.vars.append(value)
def step(self):
for i in range(len(self.boundaries)):
if self.step_num < self.boundaries[i]:
return self.vars[i]
return self.create_lr_var(self.vars[len(self.values) - 1])
class PolynomialDecay(LearningRateDecay): class PolynomialDecay(LearningRateDecay):
r""" r"""
:api_attr: imperative :api_attr: imperative
...@@ -742,241 +676,3 @@ class _LearningRateEpochDecay(LearningRateDecay): ...@@ -742,241 +676,3 @@ class _LearningRateEpochDecay(LearningRateDecay):
def get_lr(self): def get_lr(self):
raise NotImplementedError raise NotImplementedError
class StepDecay(_LearningRateEpochDecay):
"""
:api_attr: imperative
Decays the learning rate of ``optimizer`` by ``decay_rate`` every ``step_size`` number of epoch.
The algorithm can be described as the code below.
.. code-block:: text
learning_rate = 0.5
step_size = 30
decay_rate = 0.1
learning_rate = 0.5 if epoch < 30
learning_rate = 0.05 if 30 <= epoch < 60
learning_rate = 0.005 if 60 <= epoch < 90
...
Parameters:
learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
step_size (int): Period of learning rate decay.
decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` .
It should be less than 1.0. Default: 0.1.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.StepDecay(0.5, step_size=3)
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
for epoch in range(9):
for batch_id in range(5):
out = linear(input)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
print("epoch:{}, current lr is {}" .format(epoch, adam.current_step_lr()))
# epoch:0, current lr is 0.5
# epoch:1, current lr is 0.5
# epoch:2, current lr is 0.5
# epoch:3, current lr is 0.05
# epoch:4, current lr is 0.05
# epoch:5, current lr is 0.05
# epoch:6, current lr is 0.005
# epoch:7, current lr is 0.005
# epoch:8, current lr is 0.005
"""
def __init__(self, learning_rate, step_size, decay_rate=0.1):
if not isinstance(step_size, int):
raise TypeError(
"The type of 'step_size' must be 'int', but received %s."
% type(step_size)
)
if decay_rate >= 1.0:
raise ValueError('decay_rate should be < 1.0.')
self.step_size = step_size
self.decay_rate = decay_rate
super().__init__(learning_rate)
def get_lr(self):
decay_rate = self.create_lr_var(self.decay_rate)
i = self.epoch_num // self.step_size
return self.base_lr * (decay_rate**i)
class MultiStepDecay(_LearningRateEpochDecay):
"""
:api_attr: imperative
Decays the learning rate of ``optimizer`` by ``decay_rate`` once ``epoch`` reaches one of the milestones.
The algorithm can be described as the code below.
.. code-block:: text
learning_rate = 0.5
milestones = [30, 50]
decay_rate = 0.1
if epoch < 30:
learning_rate = 0.5
elif epoch < 50:
learning_rate = 0.05
else:
learning_rate = 0.005
Parameters:
learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
milestones (tuple|list): List or tuple of each boundaries. Must be increasing.
decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` .
It should be less than 1.0. Default: 0.1.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.MultiStepDecay(0.5, milestones=[3, 5])
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
for epoch in range(6):
for batch_id in range(5):
out = linear(input)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
print("epoch:{}, current lr is {}" .format(epoch, adam.current_step_lr()))
# epoch:0, current lr is 0.5
# epoch:1, current lr is 0.5
# epoch:2, current lr is 0.5
# epoch:3, current lr is 0.05
# epoch:4, current lr is 0.05
# epoch:5, current lr is 0.005
"""
def __init__(self, learning_rate, milestones, decay_rate=0.1):
if not isinstance(milestones, (tuple, list)):
raise TypeError(
"The type of 'milestones' in 'MultiStepDecay' must be 'tuple, list', but received %s."
% type(milestones)
)
if not all(
[
milestones[i] < milestones[i + 1]
for i in range(len(milestones) - 1)
]
):
raise ValueError('The elements of milestones must be incremented')
if decay_rate >= 1.0:
raise ValueError('decay_rate should be < 1.0.')
self.milestones = milestones
self.decay_rate = decay_rate
super().__init__(learning_rate)
def get_lr(self):
decay_rate = self.create_lr_var(self.decay_rate)
for i in range(len(self.milestones)):
if self.epoch_num < self.milestones[i]:
return self.base_lr * (decay_rate**i)
return self.base_lr * (decay_rate ** len(self.milestones))
class LambdaDecay(_LearningRateEpochDecay):
"""
:api_attr: imperative
Sets the learning rate of ``optimizer`` to the initial lr times a multiplicative factor, and this multiplicative
factor is computed by function ``lr_lambda`` . ``lr_lambda`` is function which receives ``epoch`` .
The algorithm can be described as the code below.
.. code-block:: text
learning_rate = 0.5 # init learning_rate
lr_lambda = lambda epoch: 0.95 ** epoch
learning_rate = 0.5 # epoch 0
learning_rate = 0.475 # epoch 1
learning_rate = 0.45125 # epoch 2
Parameters:
learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
lr_lambda (function): A function which computes a multiplicative factor given an integer parameter ``epoch`` , and
then multiply the initial learning rate by this multiplicative factor.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.LambdaDecay(0.5, lr_lambda=lambda x: 0.95**x)
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
for epoch in range(6):
for batch_id in range(5):
out = linear(input)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
print("epoch:%d, current lr is %f" .format(epoch, adam.current_step_lr()))
# epoch:0, current lr is 0.5
# epoch:1, current lr is 0.475
# epoch:2, current lr is 0.45125
"""
def __init__(self, learning_rate, lr_lambda):
if not callable(lr_lambda):
raise TypeError(
"The type of 'lr_lambda' in 'LambdaDecay' must be 'function', but received %s."
% type(lr_lambda)
)
self.lr_lambda = lr_lambda
super().__init__(learning_rate)
def get_lr(self):
base_lr = self.create_lr_var(self.base_lr)
return self.base_lr * self.lr_lambda(self.epoch_num)
...@@ -410,10 +410,10 @@ def piecewise_decay(boundaries, values): ...@@ -410,10 +410,10 @@ def piecewise_decay(boundaries, values):
paddle.enable_static() paddle.enable_static()
boundaries = [10000, 20000] boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1] values = [1.0, 0.5, 0.1]
optimizer = fluid.optimizer.Momentum( optimizer = paddle.optimizer.Momentum(
momentum=0.9, momentum=0.9,
learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values), learning_rate=paddle.optimizer.lr.PiecewiseDecay(boundaries, values),
regularization=paddle.regularizer.L2Decay(1e-4)) weight_decay=paddle.regularizer.L2Decay(1e-4))
""" """
...@@ -422,7 +422,7 @@ def piecewise_decay(boundaries, values): ...@@ -422,7 +422,7 @@ def piecewise_decay(boundaries, values):
raise ValueError("len(values) - len(boundaries) should be 1") raise ValueError("len(values) - len(boundaries) should be 1")
if in_dygraph_mode(): if in_dygraph_mode():
decay = imperate_lr.PiecewiseDecay(boundaries, values, 0) decay = paddle.optimizer.lr.PiecewiseDecay(boundaries, values)
return decay return decay
else: else:
global_step = _decay_step_counter() global_step = _decay_step_counter()
......
...@@ -98,6 +98,8 @@ class LRScheduler: ...@@ -98,6 +98,8 @@ class LRScheduler:
type(learning_rate) type(learning_rate)
) )
) )
if learning_rate < 0:
raise ValueError(f"Invalid learning rate: {learning_rate}")
self.base_lr = float(learning_rate) self.base_lr = float(learning_rate)
self.last_lr = float(learning_rate) self.last_lr = float(learning_rate)
self.last_epoch = last_epoch self.last_epoch = last_epoch
......
...@@ -376,9 +376,9 @@ def dyfunc_NoamDecay(): ...@@ -376,9 +376,9 @@ def dyfunc_NoamDecay():
def dyfunc_PiecewiseDecay(): def dyfunc_PiecewiseDecay():
boundaries = [10000, 20000] boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1] values = [1.0, 0.5, 0.1]
pd = fluid.dygraph.PiecewiseDecay(boundaries, values, begin=0) pd = paddle.optimizer.lr.PiecewiseDecay(boundaries, values)
lr = pd() lr = pd()
return lr return paddle.to_tensor(lr)
def dyfunc_PolynomialDecay(): def dyfunc_PolynomialDecay():
......
...@@ -94,11 +94,11 @@ def train(to_static): ...@@ -94,11 +94,11 @@ def train(to_static):
learning_rate = cfg.learning_rate learning_rate = cfg.learning_rate
values = [learning_rate * (gamma**i) for i in range(step_num + 1)] values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
lr = fluid.dygraph.PiecewiseDecay( lr = paddle.optimizer.lr.PiecewiseDecay(
boundaries=boundaries, values=values, begin=0 boundaries=boundaries, values=values
) )
lr = fluid.layers.linear_lr_warmup( lr = paddle.optimizer.lr.LinearWarmup(
learning_rate=lr, learning_rate=lr,
warmup_steps=cfg.warm_up_iter, warmup_steps=cfg.warm_up_iter,
start_lr=0.0, start_lr=0.0,
......
...@@ -262,7 +262,7 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): ...@@ -262,7 +262,7 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
def get_optimizer(self): def get_optimizer(self):
bd = [3, 6, 9] bd = [3, 6, 9]
optimizer = SGDOptimizer( optimizer = SGDOptimizer(
learning_rate=fluid.layers.piecewise_decay( learning_rate=paddle.optimizer.lr.PiecewiseDecay(
boundaries=bd, boundaries=bd,
values=[0.1 * (0.1**i) for i in range(len(bd) + 1)], values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
) )
...@@ -470,20 +470,20 @@ class TestOptimizerLearningRate(unittest.TestCase): ...@@ -470,20 +470,20 @@ class TestOptimizerLearningRate(unittest.TestCase):
bd = [2, 4, 6, 8] bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0] value = [0.2, 0.4, 0.6, 0.8, 1.0]
adam = fluid.optimizer.Adam( scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
fluid.dygraph.PiecewiseDecay(bd, value, 0), adam = paddle.optimizer.Adam(
parameter_list=linear.parameters(), scheduler,
parameters=linear.parameters(),
) )
np.testing.assert_allclose( np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0
)
ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
for i in range(12): for i in range(12):
adam.minimize(loss) adam.minimize(loss)
lr = adam.current_step_lr() lr = adam.get_lr()
adam.step()
scheduler.step()
np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0) np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
def test_lr_decay_natural_exp(self): def test_lr_decay_natural_exp(self):
......
...@@ -127,7 +127,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -127,7 +127,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
learning_rate=0.1, learning_rate=0.1,
gamma=0.5, gamma=0.5,
) )
Step_scheduler = fluid.dygraph.StepDecay(0.5, step_size=3) Step_scheduler = paddle.optimizer.lr.StepDecay(0.5, step_size=3)
Reducelr_scheduler = fluid.dygraph.ReduceLROnPlateau( Reducelr_scheduler = fluid.dygraph.ReduceLROnPlateau(
learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3 learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3
) )
...@@ -154,7 +154,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -154,7 +154,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
adam3.minimize(loss) adam3.minimize(loss)
linear.clear_gradients() linear.clear_gradients()
Step_scheduler.epoch() Step_scheduler.get_lr()
Reducelr_scheduler.step(loss) Reducelr_scheduler.step(loss)
paddle.save(linear.state_dict(), "save_path.pdparams") paddle.save(linear.state_dict(), "save_path.pdparams")
...@@ -163,7 +163,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -163,7 +163,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
learning_rate=0.1, learning_rate=0.1,
gamma=0.5, gamma=0.5,
) )
Step_scheduler_test = fluid.dygraph.StepDecay(0.5, step_size=3) Step_scheduler_test = paddle.optimizer.lr.StepDecay(
0.5, step_size=3
)
Reducelr_scheduler_test = fluid.dygraph.ReduceLROnPlateau( Reducelr_scheduler_test = fluid.dygraph.ReduceLROnPlateau(
learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3 learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3
) )
...@@ -189,8 +191,8 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -189,8 +191,8 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
) )
adam_test.set_dict(opt_state) adam_test.set_dict(opt_state)
self.assertEqual( self.assertEqual(
adam_test._learning_rate.epoch_num, adam_test._learning_rate.last_epoch,
adam2._learning_rate.epoch_num, adam2._learning_rate.last_epoch,
"epoch_num is different before and after set_dict", "epoch_num is different before and after set_dict",
) )
self.assertEqual( self.assertEqual(
...@@ -288,19 +290,20 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -288,19 +290,20 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
decay_rate = 0.2 decay_rate = 0.2
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
scheduler = fluid.dygraph.MultiStepDecay( scheduler = paddle.optimizer.lr.MultiStepDecay(
learning_rate, milestones, decay_rate learning_rate, milestones, decay_rate
) )
adam = fluid.optimizer.AdamOptimizer( adam = paddle.optimizer.Adam(
learning_rate=scheduler, parameter_list=linear.parameters() learning_rate=scheduler, parameters=linear.parameters()
) )
for epoch in range(10): for epoch in range(10):
right_result = multi_step_decay( right_result = multi_step_decay(
epoch, learning_rate, milestones, decay_rate epoch, learning_rate, milestones, decay_rate
) )
fluid_result = adam.current_step_lr() fluid_result = adam.get_lr()
scheduler.epoch() adam.step()
scheduler.step()
self.assertAlmostEqual( self.assertAlmostEqual(
right_result, right_result,
fluid_result, fluid_result,
...@@ -310,35 +313,36 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -310,35 +313,36 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
) )
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
lr = fluid.dygraph.MultiStepDecay( lr = paddle.optimizer.lr.MultiStepDecay(
learning_rate, [30, 50, 20], 0.1 learning_rate, [30, 50, 20], 0.1
) )
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
lr = fluid.dygraph.MultiStepDecay( lr = paddle.optimizer.lr.MultiStepDecay(
learning_rate, [20, 30, 50], 1 learning_rate, [20, 30, 50], 1
) )
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
lr = fluid.dygraph.MultiStepDecay("test", [20, 30, 50]) lr = paddle.optimizer.lr.MultiStepDecay("test", [20, 30, 50])
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
lr = fluid.dygraph.MultiStepDecay(-1, [20, 30, 50]) lr = paddle.optimizer.lr.MultiStepDecay(-1, [20, 30, 50])
def test_StepDecay(self): def test_StepDecay(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
learning_rate = 0.5 learning_rate = 0.5
step_size = 3 step_size = 3
decay_rate = 0.2 decay_rate = 0.2
scheduler = fluid.dygraph.StepDecay( scheduler = paddle.optimizer.lr.StepDecay(
learning_rate, step_size, decay_rate learning_rate, step_size, decay_rate
) )
for epoch in range(10): for epoch in range(10):
right_result = step_decay( right_result = step_decay(
epoch, learning_rate, step_size, decay_rate epoch, learning_rate, step_size, decay_rate
) )
fluid_result = scheduler().numpy().item() fluid_result = scheduler()
scheduler.epoch() scheduler.get_lr()
scheduler.step()
self.assertAlmostEqual( self.assertAlmostEqual(
right_result, right_result,
fluid_result, fluid_result,
...@@ -348,16 +352,18 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -348,16 +352,18 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
) )
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
lr = fluid.dygraph.StepDecay(learning_rate, "test", 0.1) lr = paddle.optimizer.lr.StepDecay(learning_rate, "test", 0.1)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
lr = fluid.dygraph.StepDecay(learning_rate, 20, 2) lr = paddle.optimizer.lr.StepDecay(learning_rate, 20, 2)
def test_LambdaDecay(self): def test_LambdaDecay(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
learning_rate = 0.5 learning_rate = 0.5
lr_lambda = lambda x: 0.95**x lr_lambda = lambda x: 0.95**x
scheduler = fluid.dygraph.LambdaDecay(learning_rate, lr_lambda) scheduler = paddle.optimizer.lr.LambdaDecay(
learning_rate, lr_lambda
)
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
adam = fluid.optimizer.Adam( adam = fluid.optimizer.Adam(
...@@ -366,8 +372,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -366,8 +372,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
for epoch in range(30): for epoch in range(30):
right_result = lambda_decay(epoch, learning_rate, lr_lambda) right_result = lambda_decay(epoch, learning_rate, lr_lambda)
fluid_result = scheduler().numpy().item() fluid_result = scheduler()
scheduler.epoch() scheduler.get_lr()
scheduler.step()
self.assertAlmostEqual( self.assertAlmostEqual(
right_result, right_result,
fluid_result, fluid_result,
...@@ -377,7 +384,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -377,7 +384,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
) )
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
lr = fluid.dygraph.LambdaDecay(learning_rate, "test") lr = paddle.optimizer.lr.LambdaDecay(learning_rate, "test")
class TestLearningRateDecay(unittest.TestCase): class TestLearningRateDecay(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册