From fed058857193d496e69fc27d511348e434d35977 Mon Sep 17 00:00:00 2001
From: hong <43953930+phlrain@users.noreply.github.com>
Date: Wed, 8 Jul 2020 15:40:32 +0800
Subject: [PATCH] Fix parameter list iterator bug (#25089)

* fix optimizer parameter is a iterator; test=develop

* fix parameter list None bug; test=develop

* use is not None; test=develop

* change list to iterable; test=develop
---
 python/paddle/fluid/optimizer.py              | 33 ++++++++++---------
 .../unittests/test_imperative_optimizer.py    | 26 +++++++++++++++
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 270208120c..ec09e6065b 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -67,7 +67,8 @@ class Optimizer(object):
                  regularization=None,
                  grad_clip=None,
                  name=None):
-        self._parameter_list = parameter_list
+        self._parameter_list = list(
+            parameter_list) if parameter_list is not None else None
         self._name = name
         if framework.in_dygraph_mode():
             if not isinstance(learning_rate, float) and \
@@ -705,7 +706,7 @@ class Optimizer(object):
             startup_program (Program, optional): :ref:`api_fluid_Program` for
                 initializing parameters in ``parameter_list``. The default value
                 is None, at this time :ref:`api_fluid_default_startup_program` will be used.
-            parameter_list (list, optional): List of ``Variable`` or ``Variable.name`` to update
+            parameter_list (Iterable, optional): Iterable of ``Variable`` or ``Variable.name`` to update
                 to minimize ``loss``. The default value is None, at this time all parameters
                 will be updated.
             no_grad_set (set, optional): Set of ``Variable``  or ``Variable.name`` that don't need
@@ -882,7 +883,7 @@ class Optimizer(object):
             startup_program (Program, optional): :ref:`api_fluid_Program` for
                 initializing parameters in ``parameter_list``. The default value
                 is None, at this time :ref:`api_fluid_default_startup_program` will be used.
-            parameter_list (list, optional): List of ``Variable`` or ``Variable.name`` to update
+            parameter_list (Iterable, optional): Iterable of ``Variable`` or ``Variable.name`` to update
                 to minimize ``loss``. The default value is None, at this time all parameters
                 will be updated.
             no_grad_set (set, optional): Set of ``Variable``  or ``Variable.name`` that don't need
@@ -926,7 +927,7 @@ class SGDOptimizer(Optimizer):
     Parameters:
         learning_rate (float|Variable): The learning rate used to update parameters. \
             Can be a float value or a Variable with one float value as data element.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -1034,7 +1035,7 @@ class MomentumOptimizer(Optimizer):
         learning_rate (float|Variable): The learning rate used to update parameters. \
             Can be a float value or a Variable with one float value as data element.
         momentum (float): Momentum factor
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         use_nesterov (bool, optional): Enables Nesterov momentum, default is false.
@@ -1182,7 +1183,7 @@ class DGCMomentumOptimizer(Optimizer):
         sparsity (list[float]): Get top important element from gradient tensor, the ratio is (1 - current sparsity). \
             Default is [0.999]. For example, if the sparsity is [0.99, 0.999], \
                 the top [1%, 0.1%] important element will be transmitted.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         use_nesterov (bool): Enables Nesterov momentum. True means use Nesterov. Default is False.
@@ -1580,7 +1581,7 @@ class LarsMomentumOptimizer(Optimizer):
             momentum (float): momentum factor
         lars_coeff (float): Defines how much we trust the layer to change its weights.
         lars_weight_decay (float): Weight decay coefficient for decaying using LARS.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -1699,7 +1700,7 @@ class AdagradOptimizer(Optimizer):
             It can be a float value or a ``Variable`` with a float type.
         epsilon (float, optional): A small float value for numerical stability.
             The default value is 1e-06.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -1824,7 +1825,7 @@ class AdamOptimizer(Optimizer):
             The default value is 0.999.
         epsilon (float, optional): A small float value for numerical stability.
             The default value is 1e-08.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -2090,7 +2091,7 @@ class AdamaxOptimizer(Optimizer):
             The default value is 0.999.
         epsilon (float, optional): A small float value for numerical stability.
             The default value is 1e-08.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -2265,7 +2266,7 @@ class DpsgdOptimizer(Optimizer):
         clip (float): clipping threshold
         batch_size (float): batch size.
         sigma (float): for gaussian noise.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
     Notes:
@@ -2348,7 +2349,7 @@ class DecayedAdagradOptimizer(Optimizer):
         decay (float, optional): The decay rate. The default value is 0.95.
         epsilon (float, optional): A small float value for numerical stability.
             The default value is 1e-06.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -2453,7 +2454,7 @@ class AdadeltaOptimizer(Optimizer):
         learning_rate (float|Variable): global learning rate.
         epsilon (float): a small float number for numeric stability. Default 1.0e-6.
         rho (float): a floating point value indicating the decay rate. Default 0.95.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -2610,7 +2611,7 @@ class RMSPropOptimizer(Optimizer):
             the gradient; if False, by the uncentered second moment. Setting this to
             True may help with training, but is slightly more expensive in terms of
             computation and memory. Defaults to False.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -2784,7 +2785,7 @@ class FtrlOptimizer(Optimizer):
         l1 (float): L1 regularization strength, default is 0.0.
         l2 (float): L2 regularization strength, default is 0.0.
         lr_power (float): Learning Rate Power, default is -0.5.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
@@ -2932,7 +2933,7 @@ class LambOptimizer(AdamOptimizer):
         beta2 (float, optional): The exponential decay rate for the 2nd moment estimates.
             Default 0.999.
         epsilon (float, optional): A small float value for numerical stability. Default 1e-6.
-        parameter_list (list, optional):  List of ``Variable`` names to update to minimize ``loss``. \
+        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
             This parameter is required in dygraph mode. \
             The default value is None in static mode, at this time all parameters will be updated.
         regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index d3017c31c7..a7783afc5c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -18,6 +18,7 @@ import contextlib
 import unittest
 import numpy as np
 import six
+import itertools
 
 import paddle
 import paddle.fluid as fluid
@@ -699,5 +700,30 @@ class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
         self._check_exception(exception_message)
 
 
+class TestImperativeOptimizerList(unittest.TestCase):
+    def test_parameter_list(self):
+        with fluid.dygraph.guard():
+            linear_1 = Linear(10, 10)
+            linear_2 = Linear(10, 10)
+
+            sgd = SGDOptimizer(
+                1.0,
+                parameter_list=itertools.chain(linear_1.parameters(),
+                                               linear_2.parameters()))
+
+            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
+            in_data = fluid.dygraph.to_variable(in_np)
+
+            y = linear_1(in_data)
+            y = linear_2(y)
+            loss = fluid.layers.reduce_mean(y)
+            loss.backward()
+            sgd.minimize(loss)
+
+            self.assertTrue(
+                len(sgd._parameter_list) ==
+                len(linear_1.parameters() + linear_2.parameters()))
+
+
 if __name__ == '__main__':
     unittest.main()
-- 
GitLab