From fed058857193d496e69fc27d511348e434d35977 Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Wed, 8 Jul 2020 15:40:32 +0800 Subject: [PATCH] Fix parameter list iterator bug (#25089) * fix optimizer parameter is a iterator; test=develop * fix parameter list None bug; test=develop * use is not None; test=develop * change list to iterable; test=develop --- python/paddle/fluid/optimizer.py | 33 ++++++++++--------- .../unittests/test_imperative_optimizer.py | 26 +++++++++++++++ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 270208120c..ec09e6065b 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -67,7 +67,8 @@ class Optimizer(object): regularization=None, grad_clip=None, name=None): - self._parameter_list = parameter_list + self._parameter_list = list( + parameter_list) if parameter_list is not None else None self._name = name if framework.in_dygraph_mode(): if not isinstance(learning_rate, float) and \ @@ -705,7 +706,7 @@ class Optimizer(object): startup_program (Program, optional): :ref:`api_fluid_Program` for initializing parameters in ``parameter_list``. The default value is None, at this time :ref:`api_fluid_default_startup_program` will be used. - parameter_list (list, optional): List of ``Variable`` or ``Variable.name`` to update + parameter_list (Iterable, optional): Iterable of ``Variable`` or ``Variable.name`` to update to minimize ``loss``. The default value is None, at this time all parameters will be updated. no_grad_set (set, optional): Set of ``Variable`` or ``Variable.name`` that don't need @@ -882,7 +883,7 @@ class Optimizer(object): startup_program (Program, optional): :ref:`api_fluid_Program` for initializing parameters in ``parameter_list``. The default value is None, at this time :ref:`api_fluid_default_startup_program` will be used. - parameter_list (list, optional): List of ``Variable`` or ``Variable.name`` to update + parameter_list (Iterable, optional): Iterable of ``Variable`` or ``Variable.name`` to update to minimize ``loss``. The default value is None, at this time all parameters will be updated. no_grad_set (set, optional): Set of ``Variable`` or ``Variable.name`` that don't need @@ -926,7 +927,7 @@ class SGDOptimizer(Optimizer): Parameters: learning_rate (float|Variable): The learning rate used to update parameters. \ Can be a float value or a Variable with one float value as data element. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -1034,7 +1035,7 @@ class MomentumOptimizer(Optimizer): learning_rate (float|Variable): The learning rate used to update parameters. \ Can be a float value or a Variable with one float value as data element. momentum (float): Momentum factor - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. use_nesterov (bool, optional): Enables Nesterov momentum, default is false. @@ -1182,7 +1183,7 @@ class DGCMomentumOptimizer(Optimizer): sparsity (list[float]): Get top important element from gradient tensor, the ratio is (1 - current sparsity). \ Default is [0.999]. For example, if the sparsity is [0.99, 0.999], \ the top [1%, 0.1%] important element will be transmitted. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. use_nesterov (bool): Enables Nesterov momentum. True means use Nesterov. Default is False. @@ -1580,7 +1581,7 @@ class LarsMomentumOptimizer(Optimizer): momentum (float): momentum factor lars_coeff (float): Defines how much we trust the layer to change its weights. lars_weight_decay (float): Weight decay coefficient for decaying using LARS. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -1699,7 +1700,7 @@ class AdagradOptimizer(Optimizer): It can be a float value or a ``Variable`` with a float type. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-06. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -1824,7 +1825,7 @@ class AdamOptimizer(Optimizer): The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-08. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -2090,7 +2091,7 @@ class AdamaxOptimizer(Optimizer): The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-08. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -2265,7 +2266,7 @@ class DpsgdOptimizer(Optimizer): clip (float): clipping threshold batch_size (float): batch size. sigma (float): for gaussian noise. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. Notes: @@ -2348,7 +2349,7 @@ class DecayedAdagradOptimizer(Optimizer): decay (float, optional): The decay rate. The default value is 0.95. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-06. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -2453,7 +2454,7 @@ class AdadeltaOptimizer(Optimizer): learning_rate (float|Variable): global learning rate. epsilon (float): a small float number for numeric stability. Default 1.0e-6. rho (float): a floating point value indicating the decay rate. Default 0.95. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -2610,7 +2611,7 @@ class RMSPropOptimizer(Optimizer): the gradient; if False, by the uncentered second moment. Setting this to True may help with training, but is slightly more expensive in terms of computation and memory. Defaults to False. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -2784,7 +2785,7 @@ class FtrlOptimizer(Optimizer): l1 (float): L1 regularization strength, default is 0.0. l2 (float): L2 regularization strength, default is 0.0. lr_power (float): Learning Rate Power, default is -0.5. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ @@ -2932,7 +2933,7 @@ class LambOptimizer(AdamOptimizer): beta2 (float, optional): The exponential decay rate for the 2nd moment estimates. Default 0.999. epsilon (float, optional): A small float value for numerical stability. Default 1e-6. - parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ + parameter_list (Iterable, optional): Iterable of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index d3017c31c7..a7783afc5c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -18,6 +18,7 @@ import contextlib import unittest import numpy as np import six +import itertools import paddle import paddle.fluid as fluid @@ -699,5 +700,30 @@ class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase): self._check_exception(exception_message) +class TestImperativeOptimizerList(unittest.TestCase): + def test_parameter_list(self): + with fluid.dygraph.guard(): + linear_1 = Linear(10, 10) + linear_2 = Linear(10, 10) + + sgd = SGDOptimizer( + 1.0, + parameter_list=itertools.chain(linear_1.parameters(), + linear_2.parameters())) + + in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + in_data = fluid.dygraph.to_variable(in_np) + + y = linear_1(in_data) + y = linear_2(y) + loss = fluid.layers.reduce_mean(y) + loss.backward() + sgd.minimize(loss) + + self.assertTrue( + len(sgd._parameter_list) == + len(linear_1.parameters() + linear_2.parameters())) + + if __name__ == '__main__': unittest.main() -- GitLab