[cherry-pick2.0]Avoid logging.info be printed many times in dygraph_mode,test=develop (#23930)

* Avoid logging.info be printed many times in dygraph_mode,test=develop * Avoid logging.info be printed many times in dygraph_mode,test=develop

[cherry-pick2.0]Avoid logging.info be printed many times in dygraph_mode,test=develop (#23930)
* Avoid logging.info be printed many times in dygraph_mode,test=develop * Avoid logging.info be printed many times in dygraph_mode,test=develop
862bfa91 · Zhou Wei · GitHub · 1d8a042e · 862bfa91 · 862bfa91
3 changed file
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -15,6 +15,7 @@
 from __future__ import print_function
 import numpy as np
+import logging
 from collections import defaultdict
 from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table
@@ -81,6 +82,14 @@ class Optimizer(object):
                raise AttributeError(
                    "parameter_list argument given to the Optimizer should not be None in dygraph mode."
                )
+            if regularization is not None:
+                for param in self._parameter_list:
+                    if param.regularizer is not None:
+                        logging.info(
+                            "If regularizer of a Parameter has been set by 'fluid.ParamAttr' or 'fluid.WeightNormParamAttr' already. "
+                            "The Regularization[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
+                            % regularization.__str__())
+                        break
        else:
            if not isinstance(learning_rate, float) and \
                    not isinstance(learning_rate, framework.Variable):

--- a/python/paddle/fluid/regularizer.py
+++ b/python/paddle/fluid/regularizer.py
@@ -13,19 +13,16 @@
 # limitations under the License.
 from __future__ import print_function
+import logging
 from . import framework
 from .framework import in_dygraph_mode, _varbase_creator
 from . import core
-import logging
 __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer']
-def _create_regularization_of_grad(param,
+def _create_regularization_of_grad(param, grad, regularization=None):
-                                   grad,
-                                   regularization=None,
-                                   _repeat_regularizer=None):
    """ Create and add backward regularization Operators
    Function helper of append_regularization_ops.
@@ -35,8 +32,6 @@ def _create_regularization_of_grad(param,
        return grad
    regularization_term = None
    if param.regularizer is not None:
-        if regularization is not None:
-            _repeat_regularizer.append(param.name)
        # Add variable for regularization term in grad block
        regularization_term = param.regularizer(param, grad, grad.block)
    elif regularization is not None:
@@ -89,25 +84,25 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
        Exception: Unknown regularization type
    """
    params_and_grads = []
-    _repeat_regularizer = []
    if in_dygraph_mode():
        for param, grad in parameters_and_grads:
-            new_grad = _create_regularization_of_grad(
+            new_grad = _create_regularization_of_grad(param, grad,
-                param, grad, regularization, _repeat_regularizer)
+                                                      regularization)
            params_and_grads.append((param, new_grad))
    else:
+        repeate_regularizer = False
        with framework.name_scope('regularization'):
            for param, grad in parameters_and_grads:
+                if not repeate_regularizer and param.regularizer is not None and regularization is not None:
+                    repeate_regularizer = True
+                    logging.info(
+                        "If regularizer of a Parameter has been set by 'fluid.ParamAttr' or 'fluid.WeightNormParamAttr' already. "
+                        "The Regularization[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
+                        % regularization.__str__())
                with param.block.program._optimized_guard([param, grad]):
-                    new_grad = _create_regularization_of_grad(
+                    new_grad = _create_regularization_of_grad(param, grad,
-                        param, grad, regularization, _repeat_regularizer)
+                                                              regularization)
                    params_and_grads.append((param, new_grad))
-    if len(_repeat_regularizer) > 0:
-        param_name_strlist = ", ".join(_repeat_regularizer)
-        logging.info(
-            "Regularization of [%s] have been set by ParamAttr or WeightNormParamAttr already. "
-            "So, the Regularization of Optimizer will not take effect for these parameters!"
-            % param_name_strlist)
    return params_and_grads

--- a/python/paddle/fluid/tests/unittests/test_regularizer.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer.py
@@ -231,12 +231,20 @@ class TestRegularizer(unittest.TestCase):
                    rtol=5e-5)
    def test_repeated_regularization(self):
+        l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
+        l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01)
+        fc_param_attr = fluid.ParamAttr(regularizer=l1)
+        with fluid.program_guard(fluid.Program(), fluid.Program()):
+            x = fluid.layers.uniform_random([2, 2, 3])
+            out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
+            loss = fluid.layers.reduce_sum(out)
+            sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
+            sgd.minimize(loss)
        with fluid.dygraph.guard():
            input = fluid.dygraph.to_variable(
                np.random.randn(3, 5).astype('float32'))
            fluid.default_main_program().random_seed = 1
-            l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
-            fc_param_attr = fluid.ParamAttr(regularizer=l1)
            linear1 = fluid.dygraph.Linear(
                5, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr)
            linear2 = fluid.dygraph.Linear(
@@ -245,7 +253,7 @@ class TestRegularizer(unittest.TestCase):
            loss1 = linear1(input)
            loss1.backward()
            # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr
-            l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01)
            fluid.optimizer.SGD(parameter_list=linear1.parameters(),
                                learning_rate=1e-2,
                                regularization=l2).minimize(loss1)
@@ -259,7 +267,7 @@ class TestRegularizer(unittest.TestCase):
                np.allclose(linear1.weight.numpy(), linear2.weight.numpy()),
                "weight should use the regularization in fluid.ParamAttr!")
            self.assertTrue(
-                np.allclose(linear1.bias.numpy(), linear1.bias.numpy()),
+                np.allclose(linear1.bias.numpy(), linear2.bias.numpy()),
                "bias should use the regularization in fluid.ParamAttr!")