From 629b6c78969179ad79be3eedab429802f60e2375 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Thu, 9 Apr 2020 10:45:04 +0800 Subject: [PATCH] add the prompt message of repeated settings of regularization,test=develop (#23355) --- python/paddle/fluid/optimizer.py | 92 +++++++++++++------ python/paddle/fluid/param_attr.py | 14 ++- python/paddle/fluid/regularizer.py | 91 ++++++++++++++++-- .../fluid/tests/unittests/test_regularizer.py | 32 +++++++ 4 files changed, 187 insertions(+), 42 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 9fdbaf64e1f..6f07c90ccd3 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -854,8 +854,11 @@ class SGDOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization: A Regularizer, such as :ref:`api_fluid_regularizer_L2DecayRegularizer`. \ - Optional, default is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): This parameter is used by developers to print debugging information. \ For details, please refer to :ref:`api_guide_Name`. Default is None. @@ -954,8 +957,11 @@ class MomentumOptimizer(Optimizer): This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. use_nesterov (bool, optional): Enables Nesterov momentum, default is false. - regularization: A Regularizer, such as :ref:`api_fluid_regularizer_L2DecayRegularizer`. \ - Optional, default is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): This parameter is used by developers to print debugging information. \ For details, please refer to :ref:`api_guide_Name`. Default is None. @@ -1093,8 +1099,11 @@ class DGCMomentumOptimizer(Optimizer): use_nesterov (bool): Enables Nesterov momentum. True means use Nesterov. Default is False. local_grad_clip_norm (float, optional): Local gradient clip norm value. Optional, default is None, represent no need clip. num_trainers (int, optional): The number of training nodes. Optional, default is None. - regularization (WeightDecayRegularizer, optional): A Regularizer, such as \ - :ref:`api_fluid_regularizer_L2DecayRegularizer`. Optional, default is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): This parameter is used by developers to print debugging information. \ For details, please refer to :ref:`api_guide_Name`. Default is None. @@ -1480,8 +1489,11 @@ class LarsMomentumOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization: A Regularizer, such as :ref:`api_fluid_regularizer_L2DecayRegularizer`. - Optional, default is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): This parameter is used by developers to print debugging information. \ For details, please refer to :ref:`api_guide_Name`. Default is None. @@ -1590,8 +1602,11 @@ class AdagradOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization (WeightDecayRegularizer, optional): A ``Regularizer``, such as - :ref:`api_fluid_regularizer_L2DecayRegularizer`. The default value is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. The default value is None. @@ -1706,8 +1721,11 @@ class AdamOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization (WeightDecayRegularizer, optional): A ``Regularizer``, such as - :ref:`api_fluid_regularizer_L2DecayRegularizer`. The default value is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. The default value is None. @@ -1963,8 +1981,11 @@ class AdamaxOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization (WeightDecayRegularizer, optional): A ``Regularizer``, such as - :ref:`api_fluid_regularizer_L2DecayRegularizer`. The default value is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. The default value is None. @@ -2212,8 +2233,11 @@ class DecayedAdagradOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization (WeightDecayRegularizer, optional): A ``Regularizer``, such as - :ref:`api_fluid_regularizer_L2DecayRegularizer`. The default value is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. The default value is None. @@ -2308,9 +2332,11 @@ class AdadeltaOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization (WeightDecayRegularizer, optional): A Regularizer, such as - fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no - regularization. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . @@ -2457,8 +2483,11 @@ class RMSPropOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization: A Regularizer, such as :ref:`api_fluid_regularizer_L2DecayRegularizer`. \ - Optional, default is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): This parameter is used by developers to print debugging information. \ For details, please refer to :ref:`api_guide_Name`. Default is None. @@ -2622,8 +2651,11 @@ class FtrlOptimizer(Optimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization: A Regularizer, such as :ref:`api_fluid_regularizer_L2DecayRegularizer`. \ - Optional, default is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): This parameter is used by developers to print debugging information. \ For details, please refer to :ref:`api_guide_Name`. Default is None. @@ -2761,8 +2793,11 @@ class LambOptimizer(AdamOptimizer): parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. - regularization (Regularizer|None): A Regularizer, such as - fluid.regularizer.L1DecayRegularizer. Default None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. exclude_from_weight_decay_fn (function|None): Exclude a parameter from weight decay when **exclude_from_weight_decay_fn(parameter)** returns true. Default None. @@ -2922,8 +2957,11 @@ class ModelAverage(Optimizer): average_window_rate (float): The calculate ratio of the window length relative to ``Parameter`` update times. min_average_window (int, optional): the minimum size of average window length. The default value is 10000. max_average_window (int, optional): The maximum size of average window length. The default value is 10000. - regularization (WeightDecayRegularizer, optional): A ``Regularizer``, such as - :ref:`api_fluid_regularizer_L2DecayRegularizer`. The default value is None. + regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \ + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \ + regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \ + ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. name (str, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. The default value is None. diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index ab530ae2135..79207f5bb42 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -47,8 +47,11 @@ class ParamAttr(object): learning_rate (float): The parameter's learning rate. The learning rate when optimize is the global learning rates times the parameter's learning rate times the factor of learning rate scheduler. Default 1.0. - regularizer (WeightDecayRegularizer, optional): Regularization factor. Default None, meaning - there is no regularization. + regularizer (WeightDecayRegularizer, optional): Regularization strategy. There are two method: + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If + regularizer is also set in ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ), + that regularizer setting in optimizer will be ignored. Default None, meaning there is + no regularization. trainable (bool): Whether this parameter is trainable. Default True. do_model_average (bool): Whether this parameter should do model average when model average is enabled. Default False. @@ -215,9 +218,10 @@ class WeightNormParamAttr(ParamAttr): learning_rate(float32): The parameter's learning rate when optimizer is :math:`global\_lr * parameter\_lr * scheduler\_factor`. Default 1.0. - regularizer(WeightDecayRegularizer): Regularization factor, such as - ``regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.1)``. - Default None, meaning that there is no regularization. + regularizer (WeightDecayRegularizer, optional): Regularization strategy. There are two method: + :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If regularizer + is also set in ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ), that regularizer + setting in optimizer will be ignored. Default None, meaning there is no regularization. trainable(bool, optional): Whether this parameter is trainable. Default True. do_model_average(bool, optional): Whether this parameter should do model average. Default False. diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 96b4f16c961..a80cb1eec9c 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -17,11 +17,15 @@ from __future__ import print_function from . import framework from .framework import in_dygraph_mode, _varbase_creator from . import core +import logging __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer'] -def _create_regularization_of_grad(param, grad, regularization=None): +def _create_regularization_of_grad(param, + grad, + regularization=None, + _repeat_regularizer=None): """ Create and add backward regularization Operators Function helper of append_regularization_ops. @@ -31,6 +35,8 @@ def _create_regularization_of_grad(param, grad, regularization=None): return grad regularization_term = None if param.regularizer is not None: + if regularization is not None: + _repeat_regularizer.append(param.name) # Add variable for regularization term in grad block regularization_term = param.regularizer(param, grad, grad.block) elif regularization is not None: @@ -83,18 +89,25 @@ def append_regularization_ops(parameters_and_grads, regularization=None): Exception: Unknown regularization type """ params_and_grads = [] + _repeat_regularizer = [] if in_dygraph_mode(): for param, grad in parameters_and_grads: - new_grad = _create_regularization_of_grad(param, grad, - regularization) + new_grad = _create_regularization_of_grad( + param, grad, regularization, _repeat_regularizer) params_and_grads.append((param, new_grad)) else: with framework.name_scope('regularization'): for param, grad in parameters_and_grads: with param.block.program._optimized_guard([param, grad]): - new_grad = _create_regularization_of_grad(param, grad, - regularization) + new_grad = _create_regularization_of_grad( + param, grad, regularization, _repeat_regularizer) params_and_grads.append((param, new_grad)) + if len(_repeat_regularizer) > 0: + param_name_strlist = ", ".join(_repeat_regularizer) + logging.info( + "Regularization of [%s] have been set by ParamAttr or WeightNormParamAttr already. " + "So, the Regularization of Optimizer will not take effect for these parameters!" + % param_name_strlist) return params_and_grads @@ -127,6 +140,11 @@ class L2DecayRegularizer(WeightDecayRegularizer): """ Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting. + It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ). + When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in + ``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has + higher priority than ``optimizer`` . + In the implementation, the formula of L2 Weight Decay Regularization is as follows: .. math:: @@ -134,12 +152,12 @@ class L2DecayRegularizer(WeightDecayRegularizer): L2WeightDecay = reg\_coeff * parameter Args: - regularization_coeff(float, optional): regularization coeff. - Default:0.0 + regularization_coeff(float, optional): regularization coeff. Default:0.0 Examples: .. code-block:: python + # Example1: set Regularizer in optimizer import paddle.fluid as fluid main_prog = fluid.Program() @@ -153,9 +171,33 @@ class L2DecayRegularizer(WeightDecayRegularizer): avg_loss = fluid.layers.mean(loss) optimizer = fluid.optimizer.Adagrad( learning_rate=1e-4, - regularization=fluid.regularizer.L2DecayRegularizer( + regularization=fluid.regularizer.L2Decay( regularization_coeff=0.1)) optimizer.minimize(avg_loss) + + + # Example2: set Regularizer both in ParamAttr and optimizer + import paddle.fluid as fluid + + l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) + l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) + x = fluid.layers.uniform_random([3,4]) + + # set L1 regularization in fluid.ParamAttr + w_param = fluid.ParamAttr(regularizer=l1) + hidden1 = fluid.layers.fc(x, 8, param_attr=w_param) # fc_0.w_0(L1), fc_0.b_0 + hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param) # fc_1.w_0(L1), fc_1.b_0 + predict = fluid.layers.fc(hidden2, 32) # fc_3.w_0, fc_3.b_0 + avg_loss = fluid.layers.mean(predict) + + # set L2 regularization in optimizer + optimizer = fluid.optimizer.SGD(learning_rate=1e-4, regularization=l2) + optimizer.minimize(avg_loss) + + # it will Print Message: + # Regularization of [fc_0.w_0, fc_1.w_0] have been set by ParamAttr or WeightNormParamAttr already. + # So, the Regularization of Optimizer will not take effect for these parameters! + """ def __init__(self, regularization_coeff=0.0): @@ -205,6 +247,11 @@ class L1DecayRegularizer(WeightDecayRegularizer): """ Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse. + It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ). + When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in + ``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has + higher priority than ``optimizer`` . + In the implementation, the formula of L1 Weight Decay Regularization is as follows: .. math:: @@ -212,12 +259,12 @@ class L1DecayRegularizer(WeightDecayRegularizer): L1WeightDecay = reg\_coeff * sign(parameter) Args: - regularization_coeff(float, optional): regularization coeff. - Default:0.0. + regularization_coeff(float, optional): regularization coeff. Default:0.0. Examples: .. code-block:: python + # Example1: set Regularizer in optimizer import paddle.fluid as fluid main_prog = fluid.Program() @@ -234,6 +281,30 @@ class L1DecayRegularizer(WeightDecayRegularizer): regularization=fluid.regularizer.L1DecayRegularizer( regularization_coeff=0.1)) optimizer.minimize(avg_loss) + + + # Example2: set Regularizer both in ParamAttr and optimizer + import paddle.fluid as fluid + + l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) + l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) + x = fluid.layers.uniform_random([3,4]) + + # set L1 regularization in fluid.ParamAttr + w_param = fluid.ParamAttr(regularizer=l1) + hidden1 = fluid.layers.fc(x, 8, param_attr=w_param) # fc_0.w_0(L1), fc_0.b_0 + hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param) # fc_1.w_0(L1), fc_1.b_0 + predict = fluid.layers.fc(hidden2, 32) # fc_3.w_0, fc_3.b_0 + avg_loss = fluid.layers.mean(predict) + + # set L2 regularization in optimizer + optimizer = fluid.optimizer.SGD(learning_rate=1e-4, regularization=l2) + optimizer.minimize(avg_loss) + + # it will Print Message: + # Regularization of [fc_0.w_0, fc_1.w_0] have been set by ParamAttr or WeightNormParamAttr already. + # So, the Regularization of Optimizer will not take effect for these parameters! + """ def __init__(self, regularization_coeff=0.0): diff --git a/python/paddle/fluid/tests/unittests/test_regularizer.py b/python/paddle/fluid/tests/unittests/test_regularizer.py index 62994eec7e7..a6f5018a36a 100644 --- a/python/paddle/fluid/tests/unittests/test_regularizer.py +++ b/python/paddle/fluid/tests/unittests/test_regularizer.py @@ -230,6 +230,38 @@ class TestRegularizer(unittest.TestCase): b=dense_sparse_p_sum[1][i], rtol=5e-5) + def test_repeated_regularization(self): + with fluid.dygraph.guard(): + input = fluid.dygraph.to_variable( + np.random.randn(3, 5).astype('float32')) + fluid.default_main_program().random_seed = 1 + l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) + fc_param_attr = fluid.ParamAttr(regularizer=l1) + linear1 = fluid.dygraph.Linear( + 5, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) + linear2 = fluid.dygraph.Linear( + 5, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) + + loss1 = linear1(input) + loss1.backward() + # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr + l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01) + fluid.optimizer.SGD(parameter_list=linear1.parameters(), + learning_rate=1e-2, + regularization=l2).minimize(loss1) + # only set l1 in fluid.ParamAttr + loss2 = linear2(input) + loss2.backward() + fluid.optimizer.SGD(parameter_list=linear2.parameters(), + learning_rate=1e-2).minimize(loss2) + # they should both be applied by l1, and keep the same + self.assertTrue( + np.allclose(linear1.weight.numpy(), linear2.weight.numpy()), + "weight should use the regularization in fluid.ParamAttr!") + self.assertTrue( + np.allclose(linear1.bias.numpy(), linear1.bias.numpy()), + "bias should use the regularization in fluid.ParamAttr!") + if __name__ == '__main__': unittest.main() -- GitLab