未验证 提交 b47dd158 编写于 作者: C Chen Weihang 提交者: GitHub

[HotFix] Add support for optimizer with varbase input (#32362)

* add support for optimizer with varbase input

* refine cond

* fix failed unittest

* add test for coverage
上级 c3158527
......@@ -28,10 +28,12 @@ def _create_regularization_of_grad(param, grad, regularization=None):
Function helper of append_regularization_ops.
"""
# If no gradient or no regularization is specified, then we don't need to do anything
if grad is None or (param.regularizer is None and regularization is None):
if grad is None or ((not hasattr(param, 'regularizer') or (
hasattr(param, 'regularizer') and param.regularizer is None)) and
regularization is None):
return grad
regularization_term = None
if param.regularizer is not None:
if hasattr(param, 'regularizer') and param.regularizer is not None:
# Add variable for regularization term in grad block
regularization_term = param.regularizer(param, grad, grad.block)
elif regularization is not None:
......@@ -213,7 +215,7 @@ class L2DecayRegularizer(WeightDecayRegularizer):
Returns:
new variable for weight decay
"""
assert isinstance(param, framework.Parameter)
assert isinstance(param, framework.Variable)
assert isinstance(block, framework.Block)
inputs = {"X": [param]}
......@@ -320,7 +322,7 @@ class L1DecayRegularizer(WeightDecayRegularizer):
Returns:
new variable for weight decay
"""
assert isinstance(param, framework.Parameter)
assert isinstance(param, framework.Variable)
assert isinstance(block, framework.Block)
if framework.in_dygraph_mode():
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import paddle
import paddle.optimizer as optimizer
class TestOptimizerForVarBase(unittest.TestCase):
def setUp(self):
self.lr = 0.01
def run_optimizer_step_with_varbase_list_input(self, optimizer):
x = paddle.zeros([2, 3])
y = paddle.ones([2, 3])
x.stop_gradient = False
z = x + y
opt = optimizer(
learning_rate=self.lr, parameters=[x], weight_decay=0.01)
z.backward()
opt.step()
self.assertTrue(np.allclose(x.numpy(), np.full([2, 3], -self.lr)))
def run_optimizer_minimize_with_varbase_list_input(self, optimizer):
x = paddle.zeros([2, 3])
y = paddle.ones([2, 3])
x.stop_gradient = False
z = x + y
opt = optimizer(learning_rate=self.lr, parameters=[x])
z.backward()
opt.minimize(z)
self.assertTrue(np.allclose(x.numpy(), np.full([2, 3], -self.lr)))
def test_adam_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.Adam)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adam)
def test_sgd_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.SGD)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.SGD)
def test_adagrad_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.Adagrad)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adagrad)
def test_adamw_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.AdamW)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.AdamW)
def test_adamax_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.Adamax)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adamax)
def test_momentum_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.Momentum)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.Momentum)
def test_optimizer_with_varbase_input(self):
x = paddle.zeros([2, 3])
with self.assertRaises(TypeError):
optimizer.Adam(learning_rate=self.lr, parameters=x)
def test_create_param_lr_with_1_for_coverage(self):
x = paddle.fluid.framework.ParamBase(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="x",
optimize_attr={'learning_rate': 1.0})
x.value().get_tensor().set(
np.random.random((5, 10)).astype('float32'),
paddle.fluid.framework._current_expected_place())
y = paddle.ones([5, 10])
z = x + y
opt = optimizer.Adam(learning_rate=self.lr, parameters=[x])
z.backward()
opt.step()
def test_create_param_lr_with_no_1_value_for_coverage(self):
x = paddle.fluid.framework.ParamBase(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="x",
optimize_attr={'learning_rate': 0.12})
x.value().get_tensor().set(
np.random.random((5, 10)).astype('float32'),
paddle.fluid.framework._current_expected_place())
y = paddle.ones([5, 10])
z = x + y
opt = optimizer.Adam(learning_rate=self.lr, parameters=[x])
z.backward()
opt.step()
if __name__ == "__main__":
unittest.main()
......@@ -100,8 +100,19 @@ class Optimizer(object):
weight_decay=None,
grad_clip=None,
name=None):
self._parameter_list = list(
parameters) if parameters is not None else None
if parameters is not None:
# paddle.Tensor is also iterable, so here we don't check whether
# the input is iterable, if the input is paddle.Tensor, the
# list(paddle.Tensor) will be a error value
if isinstance(parameters, paddle.Tensor):
raise TypeError(
"`parameters` argument given to the optimizer should be "
"an iterable of paddle Tensors, but got argument type is `{}`.".
format(type(parameters)))
self._parameter_list = list(parameters)
else:
self._parameter_list = None
self._name = name
if framework.in_dygraph_mode():
if self._parameter_list is None:
......@@ -110,7 +121,8 @@ class Optimizer(object):
)
if weight_decay is not None:
for param in self._parameter_list:
if param.regularizer is not None:
if hasattr(param,
'regularizer') and param.regularizer is not None:
logging.info(
"If regularizer of a Parameter has been set by 'paddle.ParamAttr' or 'static.WeightNormParamAttr' already. "
"The weight_decay[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
......@@ -433,17 +445,20 @@ class Optimizer(object):
def _create_param_lr(self, param_and_grad):
# create learning rate tensor for every parameter
param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate']
if type(param_lr) == Variable:
return param_lr
else:
if param_lr == 1.0:
return self._global_learning_rate()
if hasattr(param, 'optimize_attr'):
param_lr = param.optimize_attr['learning_rate']
if type(param_lr) == Variable:
return param_lr
else:
with default_main_program()._lr_schedule_guard(
is_with_opt=True), framework.name_scope(
'scale_with_param_lr'):
return self._global_learning_rate() * param_lr
if param_lr == 1.0:
return self._global_learning_rate()
else:
with default_main_program()._lr_schedule_guard(
is_with_opt=True), framework.name_scope(
'scale_with_param_lr'):
return self._global_learning_rate() * param_lr
else:
return self._global_learning_rate()
def _create_accumulators(self, block, parameters):
"""Create all accumulators needed by the parameters
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册