未验证 提交 de60c1d1 编写于 作者: L LoneRanger 提交者: GitHub

replace NaturalExpDecay, ExponentialDecay, InverseTimeDecay with 2.0 version (#54424)

* remove the NaturalExpDecay in fluid

* fix bug

* remove the ExponentialDecay in fluid

* remove the InverseTimeDecay in fluid

* remove the InverseTimeDecay class

* fix bug
上级 5bbbf5dd
......@@ -18,7 +18,6 @@ import paddle
from paddle.fluid.layers.learning_rate_scheduler import (
exponential_decay,
inverse_time_decay,
natural_exp_decay,
noam_decay,
)
from paddle.optimizer.lr import (
......@@ -112,9 +111,9 @@ class AddLrDecayTablePass(PassBase):
with paddle.static.program_guard(
decay_main_program, decay_startup_program
):
lr = natural_exp_decay(
1.0, lr_decay_steps, lr_scheduler.gamma, True
)
lr = paddle.optimizer.lr.NaturalExpDecay(
1.0, lr_scheduler.gamma
).get_lr()
lr_name = lr.name
logging.warn(
"NaturalExpDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n"
......
......@@ -24,9 +24,6 @@ from ..data_feeder import check_type
__all__ = [
'NoamDecay',
'PiecewiseDecay',
'NaturalExpDecay',
'ExponentialDecay',
'InverseTimeDecay',
'PolynomialDecay',
'CosineDecay',
'LinearLrWarmup',
......@@ -197,255 +194,6 @@ class PiecewiseDecay(LearningRateDecay):
return self.create_lr_var(self.vars[len(self.values) - 1])
class NaturalExpDecay(LearningRateDecay):
r"""
:api_attr: imperative
Applies natural exponential decay to the initial learning rate.
The algorithm can be described as following.
.. math::
decayed\_learning\_rate = learning\_rate * e^{y}
If staircase is set to False, then:
.. math::
y = - decay\_rate * \\frac{global\_step}{decay\_steps}
If staircase is set to True, then:
.. math::
y = - decay\_rate * math.floor(\\frac{global\_step}{decay\_steps})
Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(int): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
base_lr = 0.1
with fluid.dygraph.guard():
emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list=emb.parameters())
"""
def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase
def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)
decayed_lr = self.learning_rate * paddle.exp(
-1 * self.decay_rate * div_res
)
return decayed_lr
class ExponentialDecay(LearningRateDecay):
r"""
:api_attr: imperative
Applies exponential decay to the learning rate.
The algorithm can be described as following.
.. math::
decayed\_learning\_rate = learning\_rate * decay\_rate ^ y
If staircase is set to False, then:
.. math::
y = \\frac{global\_step}{decay\_steps}
If staircase is set to True, then:
.. math::
y = math.floor(\\frac{global\_step}{decay\_steps})
Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(float): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
base_lr = 0.1
with fluid.dygraph.guard():
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.ExponentialDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
"""
def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase
def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)
decayed_lr = self.learning_rate * (self.decay_rate**div_res)
return decayed_lr
class InverseTimeDecay(LearningRateDecay):
r"""
:api_attr: imperative
Applies inverse time decay to the initial learning rate.
The algorithm can be described as following.
If staircase is set to False, then:
.. math::
decayed\_learning\_rate = \\frac{learning\_rate}{1 + decay\_rate * \\frac{global\_step}{decay\_step}}
If staircase is set to True, then:
.. math::
decayed\_learning\_rate = \\frac{learning\_rate}{1 + decay\_rate * math.floor(\\frac{global\_step}{decay\_step})}
Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(float): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be
'float32', 'float64'. The default value is 'float32'.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
base_lr = 0.1
with fluid.dygraph.guard():
emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.InverseTimeDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list = emb.parameters())
"""
def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase
def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)
decayed_lr = self.learning_rate / (1 + self.decay_rate * div_res)
return decayed_lr
class PolynomialDecay(LearningRateDecay):
r"""
:api_attr: imperative
......
......@@ -166,8 +166,8 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.ExponentialDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.ExponentialDecay(
learning_rate, decay_rate
)
return decay
else:
......@@ -228,8 +228,8 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.NaturalExpDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.NaturalExpDecay(
learning_rate, decay_rate
)
return decay
else:
......@@ -288,8 +288,8 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.InverseTimeDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.InverseTimeDecay(
learning_rate, decay_rate
)
return decay
else:
......
......@@ -342,8 +342,8 @@ def dyfunc_CosineDecay():
def dyfunc_ExponentialDecay():
base_lr = 0.1
exponential_decay = fluid.dygraph.ExponentialDecay(
learning_rate=base_lr, decay_steps=10000, decay_rate=0.5, staircase=True
exponential_decay = paddle.optimizer.lr.ExponentialDecay(
learning_rate=base_lr, gamma=0.5
)
lr = exponential_decay()
return lr
......@@ -351,8 +351,8 @@ def dyfunc_ExponentialDecay():
def dyfunc_InverseTimeDecay():
base_lr = 0.1
inverse_time_decay = fluid.dygraph.InverseTimeDecay(
learning_rate=base_lr, decay_steps=10000, decay_rate=0.5, staircase=True
inverse_time_decay = paddle.optimizer.lr.InverseTimeDecay(
learning_rate=base_lr, gamma=0.5
)
lr = inverse_time_decay()
return lr
......@@ -360,8 +360,8 @@ def dyfunc_InverseTimeDecay():
def dyfunc_NaturalExpDecay():
base_lr = 0.1
natural_exp_decay = fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr, decay_steps=10000, decay_rate=0.5, staircase=True
natural_exp_decay = paddle.optimizer.lr.NaturalExpDecay(
learning_rate=base_lr, gamma=0.5
)
lr = natural_exp_decay()
return lr
......@@ -424,16 +424,79 @@ class TestDygraphBasicApi_ExponentialDecay(TestDygraphBasicApi_CosineDecay):
def setUp(self):
self.dygraph_func = dyfunc_ExponentialDecay
def get_dygraph_output(self):
with fluid.dygraph.guard():
fluid.default_startup_program.random_seed = SEED
fluid.default_main_program.random_seed = SEED
res = self.dygraph_func()
return res
def get_static_output(self):
startup_program = fluid.Program()
startup_program.random_seed = SEED
main_program = fluid.Program()
main_program.random_seed = SEED
with fluid.program_guard(main_program, startup_program):
static_out = dygraph_to_static_func(self.dygraph_func)()
static_out = paddle.to_tensor(static_out)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(startup_program)
static_res = exe.run(main_program, fetch_list=static_out)
return static_res[0]
class TestDygraphBasicApi_InverseTimeDecay(TestDygraphBasicApi_CosineDecay):
def setUp(self):
self.dygraph_func = dyfunc_InverseTimeDecay
def get_dygraph_output(self):
with fluid.dygraph.guard():
fluid.default_startup_program.random_seed = SEED
fluid.default_main_program.random_seed = SEED
res = self.dygraph_func()
return res
def get_static_output(self):
startup_program = fluid.Program()
startup_program.random_seed = SEED
main_program = fluid.Program()
main_program.random_seed = SEED
with fluid.program_guard(main_program, startup_program):
static_out = dygraph_to_static_func(self.dygraph_func)()
static_out = paddle.to_tensor(static_out)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(startup_program)
static_res = exe.run(main_program, fetch_list=static_out)
return static_res[0]
class TestDygraphBasicApi_NaturalExpDecay(TestDygraphBasicApi_CosineDecay):
def setUp(self):
self.dygraph_func = dyfunc_NaturalExpDecay
def get_dygraph_output(self):
with fluid.dygraph.guard():
fluid.default_startup_program.random_seed = SEED
fluid.default_main_program.random_seed = SEED
res = self.dygraph_func()
return res
def get_static_output(self):
startup_program = fluid.Program()
startup_program.random_seed = SEED
main_program = fluid.Program()
main_program.random_seed = SEED
with fluid.program_guard(main_program, startup_program):
static_out = dygraph_to_static_func(self.dygraph_func)()
static_out = paddle.to_tensor(static_out)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(startup_program)
static_res = exe.run(main_program, fetch_list=static_out)
return static_res[0]
class TestDygraphBasicApi_NoamDecay(TestDygraphBasicApi_CosineDecay):
def setUp(self):
......
......@@ -117,11 +117,9 @@ class TestDistCTR2x2(TestDistRunnerBase):
use_lr_decay = bool(os.getenv('LR_DECAY', 0))
lr = 0.0001
if use_lr_decay:
lr = fluid.layers.exponential_decay(
lr = paddle.optimizer.lr.ExponentialDecay(
learning_rate=0.0001,
decay_steps=10000,
decay_rate=0.999,
staircase=True,
gamma=0.999,
)
sgd_optimizer = fluid.optimizer.SGD(
......
......@@ -195,11 +195,9 @@ class TestPSPassWithBow(unittest.TestCase):
loss, acc, _ = self.net()
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=paddle.optimizer.lr.ExponentialDecay(
learning_rate=base_lr,
decay_steps=500,
decay_rate=0.969,
staircase=True,
gamma=0.969,
)
)
......
......@@ -307,11 +307,9 @@ class TestLRDecay(TranspilerTest):
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=paddle.optimizer.lr.ExponentialDecay(
learning_rate=1.0,
decay_steps=2100,
decay_rate=0.1,
staircase=True,
gamma=0.1,
)
)
sgd_optimizer.minimize(avg_cost)
......@@ -444,11 +442,9 @@ class TestFakeInit(TranspilerTest):
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=paddle.optimizer.lr.ExponentialDecay(
learning_rate=1.0,
decay_steps=2100,
decay_rate=0.1,
staircase=True,
gamma=0.1,
)
)
sgd_optimizer.minimize(avg_cost)
......
......@@ -499,25 +499,26 @@ class TestOptimizerLearningRate(unittest.TestCase):
loss = paddle.mean(b)
base_lr = 1.0
adam = fluid.optimizer.Adam(
fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr,
decay_steps=3,
decay_rate=0.5,
staircase=True,
),
parameter_list=linear.parameters(),
scheduler = paddle.optimizer.lr.NaturalExpDecay(
learning_rate=base_lr,
gamma=0.5,
)
np.testing.assert_allclose(
adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0
adam = paddle.optimizer.Adam(
learning_rate=scheduler,
parameters=linear.parameters(),
)
np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
counter = 0
for i in range(5):
adam.minimize(loss)
lr = adam.current_step_lr()
lr = adam.get_lr()
counter += 1
if counter % 3 == 0:
adam.step()
scheduler.step()
np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
def test_set_lr(self):
......@@ -550,14 +551,12 @@ class TestOptimizerLearningRate(unittest.TestCase):
np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
with self.assertRaises(RuntimeError):
adam = fluid.optimizer.Adam(
fluid.dygraph.NaturalExpDecay(
adam = paddle.optimizer.Adam(
paddle.optimizer.lr.NaturalExpDecay(
learning_rate=0.1,
decay_steps=3,
decay_rate=0.5,
staircase=True,
gamma=0.5,
),
parameter_list=linear.parameters(),
parameters=linear.parameters(),
)
adam.set_lr(0.01)
......
......@@ -123,11 +123,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
Exponential_scheduler = fluid.dygraph.ExponentialDecay(
Exponential_scheduler = paddle.optimizer.lr.ExponentialDecay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True,
gamma=0.5,
)
Step_scheduler = fluid.dygraph.StepDecay(0.5, step_size=3)
Reducelr_scheduler = fluid.dygraph.ReduceLROnPlateau(
......@@ -161,11 +159,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
paddle.save(linear.state_dict(), "save_path.pdparams")
Exponential_scheduler_test = fluid.dygraph.ExponentialDecay(
Exponential_scheduler_test = paddle.optimizer.lr.ExponentialDecay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True,
gamma=0.5,
)
Step_scheduler_test = fluid.dygraph.StepDecay(0.5, step_size=3)
Reducelr_scheduler_test = fluid.dygraph.ReduceLROnPlateau(
......@@ -180,9 +176,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
)
adam_test.set_dict(opt_state)
self.assertEqual(
adam_test._learning_rate.step_num,
adam1._learning_rate.step_num,
"epoch_num is different before and after set_dict",
adam_test._learning_rate.last_epoch,
adam1._learning_rate.last_epoch,
"last_epoch is different before and after set_dict",
)
paddle.save(adam2.state_dict(), "save_path.pdopt")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册