提交 d04f3b9a 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!2748 Change order param only equal to group param

Merge pull request !2748 from ghzl/change-order-params-only-equal-to-group-param
......@@ -181,8 +181,7 @@ class Adam(Optimizer):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
......@@ -220,16 +219,14 @@ class Adam(Optimizer):
>>>
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': bias_params, 'lr': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.Adam(group_params, learning_rate=0.1, weight_decay=0.0)
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate
>>> # of default value 0.1 and a weight decay of default value 0.0.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim)
......
......@@ -109,6 +109,10 @@ class LazyAdam(Optimizer):
- weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
will be used. If not, the `weight_decay` in the API will be used.
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
use dynamic learning rate, then the i-th step will
......@@ -146,12 +150,13 @@ class LazyAdam(Optimizer):
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': 0.01},
>>> {'params': no_conv_params}]
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.LazyAdam(group_params, learning_rate=0.1, weight_decay=0.0)
>>> # the conv_params's parameters will use a learning rate of 0.01 and a weight decay of 0.01
>>> # the no_cov_params's parameters don't set learning and weight decay. So they will use a
>>> # learning rate of 0.1 and a weight decay of 0.0.
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim)
......
......@@ -64,8 +64,7 @@ class Momentum(Optimizer):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
......@@ -97,16 +96,14 @@ class Momentum(Optimizer):
>>>
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': bias_params, 'lr': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.Momentum(group_params, learning_rate=0.1, momentum=0.9, weight_decay=0.0)
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate
>>> # of default value 0.1 and a weight decay of default value 0.0.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)
......
......@@ -77,8 +77,7 @@ class Optimizer(Cell):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
weight_decay (float): A floating point value for the weight decay. It should be equal to or greater than 0.
If the type of `weight_decay` input is int, it will be converted to float. Default: 0.0.
......@@ -351,16 +350,18 @@ class Optimizer(Cell):
self.group_weight_decay.append(weight_decay_)
if self.is_group_params_ordered:
self._order_and_adjust_group_params(ordered_parameters, learning_rate, weight_decay)
self._order_and_adjust_group_params(ordered_parameters)
def _order_and_adjust_group_params(self, ordered_parameters, learning_rate, weight_decay):
def _order_and_adjust_group_params(self, ordered_parameters):
"""
Order group parameter, learning rate and weight decay in group params. And assign the parameters
which in the value of 'order_params' but not in any group to default value.
Order group parameter, learning rate and weight decay in group params.
"""
params_length = len(ordered_parameters)
ordered_learning_rate = [Parameter(learning_rate, name="lr_" + param.name) for param in ordered_parameters]
ordered_weight_decay = [weight_decay * self.loss_scale] * params_length
params_length = len(self.group_params)
if len(ordered_parameters) != len(self.group_params):
raise ValueError(f"The value of 'order_params' should be same with all group parameters.")
ordered_learning_rate = [None] * params_length
ordered_weight_decay = [None] * params_length
params_name = [param.name for param in ordered_parameters]
for param, lr, wd in zip(self.group_params, self.group_lr, self.group_weight_decay):
......
......@@ -107,8 +107,7 @@ class RMSProp(Optimizer):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
......@@ -140,16 +139,14 @@ class RMSProp(Optimizer):
>>>
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': bias_params, 'lr': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.RMSProp(group_params, learning_rate=0.1, weight_decay=0.0)
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate
>>> # of default value 0.1 and a weight decay of default value 0.0.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim)
......
......@@ -64,8 +64,7 @@ class SGD(Optimizer):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
......@@ -98,16 +97,14 @@ class SGD(Optimizer):
>>>
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': bias_params, 'lr': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.SGD(group_params, learning_rate=0.1, weight_decay=0.0)
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate
>>> # of default value 0.1 and a weight decay of default value 0.0.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim)
......
......@@ -250,8 +250,9 @@ def test_get_lr_parameter_with_order_group():
net = LeNet5()
conv_lr = 0.1
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
group_params = [{'params': conv_params, 'lr': conv_lr},
{'order_params': net.trainable_params()}]
{'params': no_conv_params}]
opt = SGD(group_params)
assert opt.is_group_lr is True
for param in opt.parameters:
......@@ -278,65 +279,19 @@ def test_get_lr_parameter_with_no_group():
opt.get_lr_parameter(params_error)
def test_order_params_lr():
net = LeNet5()
conv_lr = 0.01
default_lr = 0.1
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
group_params = [{'params': conv_params, 'lr': conv_lr},
{'order_params': net.trainable_params()}]
opt = SGD(group_params, learning_rate=default_lr)
assert opt.is_group is True
assert opt.is_group_lr is True
assert opt.is_group_params_ordered is True
for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()):
if param in conv_params:
assert np.all(lr.data.asnumpy() == Tensor(conv_lr, mstype.float32).asnumpy())
else:
assert np.all(lr.data.asnumpy() == Tensor(default_lr, mstype.float32).asnumpy())
assert param.name == order_param.name
assert lr.name == 'lr_' + param.name
def test_order_params_weight_decay():
net = LeNet5()
conv_weight_decay = 0.01
default_wd = 0.0
default_lr = 0.1
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
group_params = [{'params': conv_params, 'weight_decay': conv_weight_decay},
{'order_params': net.trainable_params()}]
opt = SGD(group_params, learning_rate=default_lr, weight_decay=default_wd)
assert opt.is_group is True
assert opt.is_group_lr is False
assert opt.is_group_params_ordered is True
assert opt.learning_rate.name == "learning_rate"
assert np.all(opt.learning_rate.data.asnumpy() == Tensor(default_lr, mstype.float32).asnumpy())
for weight_decay, decay_flags, param, order_param in zip(
opt.weight_decay, opt.decay_flags, opt.parameters, net.trainable_params()):
if param in conv_params:
assert weight_decay == conv_weight_decay
assert decay_flags is True
else:
assert weight_decay == default_wd
assert decay_flags is False
assert param.name == order_param.name
def test_order_params_all_1():
def test_order_params_1():
net = LeNet5()
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
group_params = [{'params': conv_params, 'weight_decay': 0.01},
{'params': bias_params, 'lr': 0.01},
{'order_params': net.trainable_params()}]
{'order_params': bias_params+conv_params}]
opt = SGD(group_params, learning_rate=0.1, weight_decay=0.0)
assert opt.is_group is True
assert opt.is_group_lr is True
assert opt.is_group_params_ordered is True
for weight_decay, decay_flags, lr, param, order_param in zip(
opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, net.trainable_params()):
opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, bias_params+conv_params):
if param in conv_params:
assert np.all(lr.data.asnumpy() == Tensor(0.1, mstype.float32).asnumpy())
assert weight_decay == 0.01
......@@ -354,7 +309,7 @@ def test_order_params_all_1():
assert lr.name == 'lr_' + param.name
def test_order_params_all_2():
def test_order_params_2():
net = LeNet5()
conv_weight_decay = 0.01
fc1_lr = (0.5, 0.4, 0.3)
......@@ -364,13 +319,13 @@ def test_order_params_all_2():
fc1_params = list(filter(lambda x: 'fc1' in x.name, net.trainable_params()))
group_params = [{'params': fc1_params, 'lr': fc1_lr},
{'params': conv_params, 'weight_decay': conv_weight_decay},
{'order_params': net.trainable_params()}]
{'order_params': fc1_params+conv_params}]
opt = SGD(group_params, learning_rate=default_lr, weight_decay=default_wd)
assert opt.is_group is True
assert opt.is_group_lr is True
assert opt.is_group_params_ordered is True
for weight_decay, decay_flags, lr, param, order_param in zip(
opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, net.trainable_params()):
opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, fc1_params+conv_params):
if param in conv_params:
assert np.all(lr.data.asnumpy() == Tensor(np.array([default_lr] * 3), mstype.float32).asnumpy())
assert weight_decay == conv_weight_decay
......@@ -388,7 +343,7 @@ def test_order_params_all_2():
assert lr.name == 'lr_' + param.name
def test_get_order_params_with_not_include():
def test_get_order_params_with_not_same():
net = LeNet5()
conv_weight_decay = 0.8
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册