From b47dd1585a2e42325540667590d5d6463e87e958 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 21 Apr 2021 20:56:00 +0800 Subject: [PATCH] [HotFix] Add support for optimizer with varbase input (#32362) * add support for optimizer with varbase input * refine cond * fix failed unittest * add test for coverage --- python/paddle/fluid/regularizer.py | 10 +- .../unittests/test_optimizer_for_varbase.py | 122 ++++++++++++++++++ python/paddle/optimizer/optimizer.py | 41 ++++-- 3 files changed, 156 insertions(+), 17 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 5e0e5f724a..db08955c45 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -28,10 +28,12 @@ def _create_regularization_of_grad(param, grad, regularization=None): Function helper of append_regularization_ops. """ # If no gradient or no regularization is specified, then we don't need to do anything - if grad is None or (param.regularizer is None and regularization is None): + if grad is None or ((not hasattr(param, 'regularizer') or ( + hasattr(param, 'regularizer') and param.regularizer is None)) and + regularization is None): return grad regularization_term = None - if param.regularizer is not None: + if hasattr(param, 'regularizer') and param.regularizer is not None: # Add variable for regularization term in grad block regularization_term = param.regularizer(param, grad, grad.block) elif regularization is not None: @@ -213,7 +215,7 @@ class L2DecayRegularizer(WeightDecayRegularizer): Returns: new variable for weight decay """ - assert isinstance(param, framework.Parameter) + assert isinstance(param, framework.Variable) assert isinstance(block, framework.Block) inputs = {"X": [param]} @@ -320,7 +322,7 @@ class L1DecayRegularizer(WeightDecayRegularizer): Returns: new variable for weight decay """ - assert isinstance(param, framework.Parameter) + assert isinstance(param, framework.Variable) assert isinstance(block, framework.Block) if framework.in_dygraph_mode(): diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py b/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py new file mode 100644 index 0000000000..8fdedce224 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py @@ -0,0 +1,122 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest + +import paddle +import paddle.optimizer as optimizer + + +class TestOptimizerForVarBase(unittest.TestCase): + def setUp(self): + self.lr = 0.01 + + def run_optimizer_step_with_varbase_list_input(self, optimizer): + x = paddle.zeros([2, 3]) + y = paddle.ones([2, 3]) + x.stop_gradient = False + + z = x + y + + opt = optimizer( + learning_rate=self.lr, parameters=[x], weight_decay=0.01) + + z.backward() + opt.step() + + self.assertTrue(np.allclose(x.numpy(), np.full([2, 3], -self.lr))) + + def run_optimizer_minimize_with_varbase_list_input(self, optimizer): + x = paddle.zeros([2, 3]) + y = paddle.ones([2, 3]) + x.stop_gradient = False + + z = x + y + + opt = optimizer(learning_rate=self.lr, parameters=[x]) + + z.backward() + opt.minimize(z) + + self.assertTrue(np.allclose(x.numpy(), np.full([2, 3], -self.lr))) + + def test_adam_with_varbase_list_input(self): + self.run_optimizer_step_with_varbase_list_input(optimizer.Adam) + self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adam) + + def test_sgd_with_varbase_list_input(self): + self.run_optimizer_step_with_varbase_list_input(optimizer.SGD) + self.run_optimizer_minimize_with_varbase_list_input(optimizer.SGD) + + def test_adagrad_with_varbase_list_input(self): + self.run_optimizer_step_with_varbase_list_input(optimizer.Adagrad) + self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adagrad) + + def test_adamw_with_varbase_list_input(self): + self.run_optimizer_step_with_varbase_list_input(optimizer.AdamW) + self.run_optimizer_minimize_with_varbase_list_input(optimizer.AdamW) + + def test_adamax_with_varbase_list_input(self): + self.run_optimizer_step_with_varbase_list_input(optimizer.Adamax) + self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adamax) + + def test_momentum_with_varbase_list_input(self): + self.run_optimizer_step_with_varbase_list_input(optimizer.Momentum) + self.run_optimizer_minimize_with_varbase_list_input(optimizer.Momentum) + + def test_optimizer_with_varbase_input(self): + x = paddle.zeros([2, 3]) + with self.assertRaises(TypeError): + optimizer.Adam(learning_rate=self.lr, parameters=x) + + def test_create_param_lr_with_1_for_coverage(self): + x = paddle.fluid.framework.ParamBase( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="x", + optimize_attr={'learning_rate': 1.0}) + x.value().get_tensor().set( + np.random.random((5, 10)).astype('float32'), + paddle.fluid.framework._current_expected_place()) + + y = paddle.ones([5, 10]) + z = x + y + opt = optimizer.Adam(learning_rate=self.lr, parameters=[x]) + z.backward() + opt.step() + + def test_create_param_lr_with_no_1_value_for_coverage(self): + x = paddle.fluid.framework.ParamBase( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="x", + optimize_attr={'learning_rate': 0.12}) + x.value().get_tensor().set( + np.random.random((5, 10)).astype('float32'), + paddle.fluid.framework._current_expected_place()) + + y = paddle.ones([5, 10]) + z = x + y + opt = optimizer.Adam(learning_rate=self.lr, parameters=[x]) + z.backward() + opt.step() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index b37d172606..0d44df5901 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -100,8 +100,19 @@ class Optimizer(object): weight_decay=None, grad_clip=None, name=None): - self._parameter_list = list( - parameters) if parameters is not None else None + if parameters is not None: + # paddle.Tensor is also iterable, so here we don't check whether + # the input is iterable, if the input is paddle.Tensor, the + # list(paddle.Tensor) will be a error value + if isinstance(parameters, paddle.Tensor): + raise TypeError( + "`parameters` argument given to the optimizer should be " + "an iterable of paddle Tensors, but got argument type is `{}`.". + format(type(parameters))) + self._parameter_list = list(parameters) + else: + self._parameter_list = None + self._name = name if framework.in_dygraph_mode(): if self._parameter_list is None: @@ -110,7 +121,8 @@ class Optimizer(object): ) if weight_decay is not None: for param in self._parameter_list: - if param.regularizer is not None: + if hasattr(param, + 'regularizer') and param.regularizer is not None: logging.info( "If regularizer of a Parameter has been set by 'paddle.ParamAttr' or 'static.WeightNormParamAttr' already. " "The weight_decay[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!" @@ -433,17 +445,20 @@ class Optimizer(object): def _create_param_lr(self, param_and_grad): # create learning rate tensor for every parameter param = param_and_grad[0] - param_lr = param.optimize_attr['learning_rate'] - if type(param_lr) == Variable: - return param_lr - else: - if param_lr == 1.0: - return self._global_learning_rate() + if hasattr(param, 'optimize_attr'): + param_lr = param.optimize_attr['learning_rate'] + if type(param_lr) == Variable: + return param_lr else: - with default_main_program()._lr_schedule_guard( - is_with_opt=True), framework.name_scope( - 'scale_with_param_lr'): - return self._global_learning_rate() * param_lr + if param_lr == 1.0: + return self._global_learning_rate() + else: + with default_main_program()._lr_schedule_guard( + is_with_opt=True), framework.name_scope( + 'scale_with_param_lr'): + return self._global_learning_rate() * param_lr + else: + return self._global_learning_rate() def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters -- GitLab