diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 38ffbc74b16bdb9c2ba6350b67ab3deb843d2f75..2934ea5f9a58539643017177c32cc58a7a8ba905 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -15,6 +15,7 @@ from __future__ import print_function import numpy as np +import logging from collections import defaultdict from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table @@ -81,6 +82,14 @@ class Optimizer(object): raise AttributeError( "parameter_list argument given to the Optimizer should not be None in dygraph mode." ) + if regularization is not None: + for param in self._parameter_list: + if param.regularizer is not None: + logging.info( + "If regularizer of a Parameter has been set by 'fluid.ParamAttr' or 'fluid.WeightNormParamAttr' already. " + "The Regularization[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!" + % regularization.__str__()) + break else: if not isinstance(learning_rate, float) and \ not isinstance(learning_rate, framework.Variable): diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index a80cb1eec9c63a6e52fdbb9bc786cab7869a03b2..9fe24ec2c9d87d1c82f8a3fbd771c714ad376aad 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -13,19 +13,16 @@ # limitations under the License. from __future__ import print_function +import logging from . import framework from .framework import in_dygraph_mode, _varbase_creator from . import core -import logging __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer'] -def _create_regularization_of_grad(param, - grad, - regularization=None, - _repeat_regularizer=None): +def _create_regularization_of_grad(param, grad, regularization=None): """ Create and add backward regularization Operators Function helper of append_regularization_ops. @@ -35,8 +32,6 @@ def _create_regularization_of_grad(param, return grad regularization_term = None if param.regularizer is not None: - if regularization is not None: - _repeat_regularizer.append(param.name) # Add variable for regularization term in grad block regularization_term = param.regularizer(param, grad, grad.block) elif regularization is not None: @@ -89,25 +84,25 @@ def append_regularization_ops(parameters_and_grads, regularization=None): Exception: Unknown regularization type """ params_and_grads = [] - _repeat_regularizer = [] if in_dygraph_mode(): for param, grad in parameters_and_grads: - new_grad = _create_regularization_of_grad( - param, grad, regularization, _repeat_regularizer) + new_grad = _create_regularization_of_grad(param, grad, + regularization) params_and_grads.append((param, new_grad)) else: + repeate_regularizer = False with framework.name_scope('regularization'): for param, grad in parameters_and_grads: + if not repeate_regularizer and param.regularizer is not None and regularization is not None: + repeate_regularizer = True + logging.info( + "If regularizer of a Parameter has been set by 'fluid.ParamAttr' or 'fluid.WeightNormParamAttr' already. " + "The Regularization[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!" + % regularization.__str__()) with param.block.program._optimized_guard([param, grad]): - new_grad = _create_regularization_of_grad( - param, grad, regularization, _repeat_regularizer) + new_grad = _create_regularization_of_grad(param, grad, + regularization) params_and_grads.append((param, new_grad)) - if len(_repeat_regularizer) > 0: - param_name_strlist = ", ".join(_repeat_regularizer) - logging.info( - "Regularization of [%s] have been set by ParamAttr or WeightNormParamAttr already. " - "So, the Regularization of Optimizer will not take effect for these parameters!" - % param_name_strlist) return params_and_grads diff --git a/python/paddle/fluid/tests/unittests/test_regularizer.py b/python/paddle/fluid/tests/unittests/test_regularizer.py index a6f5018a36ad5cb9d7813b0052085b4f89f8e71e..58b407f8bc1f41301a068f0b85f4c4e9860a45ff 100644 --- a/python/paddle/fluid/tests/unittests/test_regularizer.py +++ b/python/paddle/fluid/tests/unittests/test_regularizer.py @@ -231,12 +231,20 @@ class TestRegularizer(unittest.TestCase): rtol=5e-5) def test_repeated_regularization(self): + l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) + l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01) + fc_param_attr = fluid.ParamAttr(regularizer=l1) + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.layers.uniform_random([2, 2, 3]) + out = fluid.layers.fc(x, 5, param_attr=fc_param_attr) + loss = fluid.layers.reduce_sum(out) + sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2) + sgd.minimize(loss) with fluid.dygraph.guard(): input = fluid.dygraph.to_variable( np.random.randn(3, 5).astype('float32')) fluid.default_main_program().random_seed = 1 - l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) - fc_param_attr = fluid.ParamAttr(regularizer=l1) + linear1 = fluid.dygraph.Linear( 5, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) linear2 = fluid.dygraph.Linear( @@ -245,7 +253,7 @@ class TestRegularizer(unittest.TestCase): loss1 = linear1(input) loss1.backward() # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr - l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01) + fluid.optimizer.SGD(parameter_list=linear1.parameters(), learning_rate=1e-2, regularization=l2).minimize(loss1) @@ -259,7 +267,7 @@ class TestRegularizer(unittest.TestCase): np.allclose(linear1.weight.numpy(), linear2.weight.numpy()), "weight should use the regularization in fluid.ParamAttr!") self.assertTrue( - np.allclose(linear1.bias.numpy(), linear1.bias.numpy()), + np.allclose(linear1.bias.numpy(), linear2.bias.numpy()), "bias should use the regularization in fluid.ParamAttr!")