[HotFix] Add support for optimizer with varbase input (#32362)

* add support for optimizer with varbase input * refine cond * fix failed unittest * add test for coverage

[HotFix] Add support for optimizer with varbase input (#32362)
* add support for optimizer with varbase input * refine cond * fix failed unittest * add test for coverage
b47dd158 · Chen Weihang · GitHub · c3158527 · b47dd158 · b47dd158
3 changed file
--- a/python/paddle/fluid/regularizer.py
+++ b/python/paddle/fluid/regularizer.py
@@ -28,10 +28,12 @@ def _create_regularization_of_grad(param, grad, regularization=None):
    Function helper of append_regularization_ops.
    """
    # If no gradient or no regularization is specified,  then we don't need to do anything
-    if grad is None or (param.regularizer is None and regularization is None):
+    if grad is None or ((not hasattr(param, 'regularizer') or (
+            hasattr(param, 'regularizer') and param.regularizer is None)) and
+                        regularization is None):
        return grad
    regularization_term = None
-    if param.regularizer is not None:
+    if hasattr(param, 'regularizer') and param.regularizer is not None:
        # Add variable for regularization term in grad block
        regularization_term = param.regularizer(param, grad, grad.block)
    elif regularization is not None:
@@ -213,7 +215,7 @@ class L2DecayRegularizer(WeightDecayRegularizer):
        Returns:
            new variable for weight decay
        """
-        assert isinstance(param, framework.Parameter)
+        assert isinstance(param, framework.Variable)
        assert isinstance(block, framework.Block)

        inputs = {"X": [param]}
@@ -320,7 +322,7 @@ class L1DecayRegularizer(WeightDecayRegularizer):
        Returns:
            new variable for weight decay
        """
-        assert isinstance(param, framework.Parameter)
+        assert isinstance(param, framework.Variable)
        assert isinstance(block, framework.Block)

        if framework.in_dygraph_mode():

--- a/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import unittest
+
+import paddle
+import paddle.optimizer as optimizer
+
+
+class TestOptimizerForVarBase(unittest.TestCase):
+    def setUp(self):
+        self.lr = 0.01
+
+    def run_optimizer_step_with_varbase_list_input(self, optimizer):
+        x = paddle.zeros([2, 3])
+        y = paddle.ones([2, 3])
+        x.stop_gradient = False
+
+        z = x + y
+
+        opt = optimizer(
+            learning_rate=self.lr, parameters=[x], weight_decay=0.01)
+
+        z.backward()
+        opt.step()
+
+        self.assertTrue(np.allclose(x.numpy(), np.full([2, 3], -self.lr)))
+
+    def run_optimizer_minimize_with_varbase_list_input(self, optimizer):
+        x = paddle.zeros([2, 3])
+        y = paddle.ones([2, 3])
+        x.stop_gradient = False
+
+        z = x + y
+
+        opt = optimizer(learning_rate=self.lr, parameters=[x])
+
+        z.backward()
+        opt.minimize(z)
+
+        self.assertTrue(np.allclose(x.numpy(), np.full([2, 3], -self.lr)))
+
+    def test_adam_with_varbase_list_input(self):
+        self.run_optimizer_step_with_varbase_list_input(optimizer.Adam)
+        self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adam)
+
+    def test_sgd_with_varbase_list_input(self):
+        self.run_optimizer_step_with_varbase_list_input(optimizer.SGD)
+        self.run_optimizer_minimize_with_varbase_list_input(optimizer.SGD)
+
+    def test_adagrad_with_varbase_list_input(self):
+        self.run_optimizer_step_with_varbase_list_input(optimizer.Adagrad)
+        self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adagrad)
+
+    def test_adamw_with_varbase_list_input(self):
+        self.run_optimizer_step_with_varbase_list_input(optimizer.AdamW)
+        self.run_optimizer_minimize_with_varbase_list_input(optimizer.AdamW)
+
+    def test_adamax_with_varbase_list_input(self):
+        self.run_optimizer_step_with_varbase_list_input(optimizer.Adamax)
+        self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adamax)
+
+    def test_momentum_with_varbase_list_input(self):
+        self.run_optimizer_step_with_varbase_list_input(optimizer.Momentum)
+        self.run_optimizer_minimize_with_varbase_list_input(optimizer.Momentum)
+
+    def test_optimizer_with_varbase_input(self):
+        x = paddle.zeros([2, 3])
+        with self.assertRaises(TypeError):
+            optimizer.Adam(learning_rate=self.lr, parameters=x)
+
+    def test_create_param_lr_with_1_for_coverage(self):
+        x = paddle.fluid.framework.ParamBase(
+            dtype="float32",
+            shape=[5, 10],
+            lod_level=0,
+            name="x",
+            optimize_attr={'learning_rate': 1.0})
+        x.value().get_tensor().set(
+            np.random.random((5, 10)).astype('float32'),
+            paddle.fluid.framework._current_expected_place())
+
+        y = paddle.ones([5, 10])
+        z = x + y
+        opt = optimizer.Adam(learning_rate=self.lr, parameters=[x])
+        z.backward()
+        opt.step()
+
+    def test_create_param_lr_with_no_1_value_for_coverage(self):
+        x = paddle.fluid.framework.ParamBase(
+            dtype="float32",
+            shape=[5, 10],
+            lod_level=0,
+            name="x",
+            optimize_attr={'learning_rate': 0.12})
+        x.value().get_tensor().set(
+            np.random.random((5, 10)).astype('float32'),
+            paddle.fluid.framework._current_expected_place())
+
+        y = paddle.ones([5, 10])
+        z = x + y
+        opt = optimizer.Adam(learning_rate=self.lr, parameters=[x])
+        z.backward()
+        opt.step()
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -100,8 +100,19 @@ class Optimizer(object):
                 weight_decay=None,
                 grad_clip=None,
                 name=None):
-        self._parameter_list = list(
-            parameters) if parameters is not None else None
+        if parameters is not None:
+            # paddle.Tensor is also iterable, so here we don't check whether
+            # the input is iterable, if the input is paddle.Tensor, the
+            # list(paddle.Tensor) will be a error value
+            if isinstance(parameters, paddle.Tensor):
+                raise TypeError(
+                    "`parameters` argument given to the optimizer should be "
+                    "an iterable of paddle Tensors, but got argument type is `{}`.".
+                    format(type(parameters)))
+            self._parameter_list = list(parameters)
+        else:
+            self._parameter_list = None
+
        self._name = name
        if framework.in_dygraph_mode():
            if self._parameter_list is None:
@@ -110,7 +121,8 @@ class Optimizer(object):
                )
            if weight_decay is not None:
                for param in self._parameter_list:
-                    if param.regularizer is not None:
+                    if hasattr(param,
+                               'regularizer') and param.regularizer is not None:
                        logging.info(
                            "If regularizer of a Parameter has been set by 'paddle.ParamAttr' or 'static.WeightNormParamAttr' already. "
                            "The weight_decay[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
@@ -433,17 +445,20 @@ class Optimizer(object):
    def _create_param_lr(self, param_and_grad):
        # create learning rate tensor for every parameter
        param = param_and_grad[0]
-        param_lr = param.optimize_attr['learning_rate']
-        if type(param_lr) == Variable:
-            return param_lr
-        else:
-            if param_lr == 1.0:
-                return self._global_learning_rate()
+        if hasattr(param, 'optimize_attr'):
+            param_lr = param.optimize_attr['learning_rate']
+            if type(param_lr) == Variable:
+                return param_lr
            else:
-                with default_main_program()._lr_schedule_guard(
-                        is_with_opt=True), framework.name_scope(
-                            'scale_with_param_lr'):
-                    return self._global_learning_rate() * param_lr
+                if param_lr == 1.0:
+                    return self._global_learning_rate()
+                else:
+                    with default_main_program()._lr_schedule_guard(
+                            is_with_opt=True), framework.name_scope(
+                                'scale_with_param_lr'):
+                        return self._global_learning_rate() * param_lr
+        else:
+            return self._global_learning_rate()

    def _create_accumulators(self, block, parameters):
        """Create all accumulators needed by the parameters