未验证 提交 862bfa91 编写于 作者: Z Zhou Wei 提交者: GitHub

[cherry-pick2.0]Avoid logging.info be printed many times in dygraph_mode,test=develop (#23930)

* Avoid logging.info be printed many times in dygraph_mode,test=develop

* Avoid logging.info be printed many times in dygraph_mode,test=develop
上级 1d8a042e
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import logging
from collections import defaultdict from collections import defaultdict
from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table
...@@ -81,6 +82,14 @@ class Optimizer(object): ...@@ -81,6 +82,14 @@ class Optimizer(object):
raise AttributeError( raise AttributeError(
"parameter_list argument given to the Optimizer should not be None in dygraph mode." "parameter_list argument given to the Optimizer should not be None in dygraph mode."
) )
if regularization is not None:
for param in self._parameter_list:
if param.regularizer is not None:
logging.info(
"If regularizer of a Parameter has been set by 'fluid.ParamAttr' or 'fluid.WeightNormParamAttr' already. "
"The Regularization[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
% regularization.__str__())
break
else: else:
if not isinstance(learning_rate, float) and \ if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, framework.Variable): not isinstance(learning_rate, framework.Variable):
......
...@@ -13,19 +13,16 @@ ...@@ -13,19 +13,16 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import logging
from . import framework from . import framework
from .framework import in_dygraph_mode, _varbase_creator from .framework import in_dygraph_mode, _varbase_creator
from . import core from . import core
import logging
__all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer'] __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer']
def _create_regularization_of_grad(param, def _create_regularization_of_grad(param, grad, regularization=None):
grad,
regularization=None,
_repeat_regularizer=None):
""" Create and add backward regularization Operators """ Create and add backward regularization Operators
Function helper of append_regularization_ops. Function helper of append_regularization_ops.
...@@ -35,8 +32,6 @@ def _create_regularization_of_grad(param, ...@@ -35,8 +32,6 @@ def _create_regularization_of_grad(param,
return grad return grad
regularization_term = None regularization_term = None
if param.regularizer is not None: if param.regularizer is not None:
if regularization is not None:
_repeat_regularizer.append(param.name)
# Add variable for regularization term in grad block # Add variable for regularization term in grad block
regularization_term = param.regularizer(param, grad, grad.block) regularization_term = param.regularizer(param, grad, grad.block)
elif regularization is not None: elif regularization is not None:
...@@ -89,25 +84,25 @@ def append_regularization_ops(parameters_and_grads, regularization=None): ...@@ -89,25 +84,25 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
Exception: Unknown regularization type Exception: Unknown regularization type
""" """
params_and_grads = [] params_and_grads = []
_repeat_regularizer = []
if in_dygraph_mode(): if in_dygraph_mode():
for param, grad in parameters_and_grads: for param, grad in parameters_and_grads:
new_grad = _create_regularization_of_grad( new_grad = _create_regularization_of_grad(param, grad,
param, grad, regularization, _repeat_regularizer) regularization)
params_and_grads.append((param, new_grad)) params_and_grads.append((param, new_grad))
else: else:
repeate_regularizer = False
with framework.name_scope('regularization'): with framework.name_scope('regularization'):
for param, grad in parameters_and_grads: for param, grad in parameters_and_grads:
if not repeate_regularizer and param.regularizer is not None and regularization is not None:
repeate_regularizer = True
logging.info(
"If regularizer of a Parameter has been set by 'fluid.ParamAttr' or 'fluid.WeightNormParamAttr' already. "
"The Regularization[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
% regularization.__str__())
with param.block.program._optimized_guard([param, grad]): with param.block.program._optimized_guard([param, grad]):
new_grad = _create_regularization_of_grad( new_grad = _create_regularization_of_grad(param, grad,
param, grad, regularization, _repeat_regularizer) regularization)
params_and_grads.append((param, new_grad)) params_and_grads.append((param, new_grad))
if len(_repeat_regularizer) > 0:
param_name_strlist = ", ".join(_repeat_regularizer)
logging.info(
"Regularization of [%s] have been set by ParamAttr or WeightNormParamAttr already. "
"So, the Regularization of Optimizer will not take effect for these parameters!"
% param_name_strlist)
return params_and_grads return params_and_grads
......
...@@ -231,12 +231,20 @@ class TestRegularizer(unittest.TestCase): ...@@ -231,12 +231,20 @@ class TestRegularizer(unittest.TestCase):
rtol=5e-5) rtol=5e-5)
def test_repeated_regularization(self): def test_repeated_regularization(self):
l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01)
fc_param_attr = fluid.ParamAttr(regularizer=l1)
with fluid.program_guard(fluid.Program(), fluid.Program()):
x = fluid.layers.uniform_random([2, 2, 3])
out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
loss = fluid.layers.reduce_sum(out)
sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
sgd.minimize(loss)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
input = fluid.dygraph.to_variable( input = fluid.dygraph.to_variable(
np.random.randn(3, 5).astype('float32')) np.random.randn(3, 5).astype('float32'))
fluid.default_main_program().random_seed = 1 fluid.default_main_program().random_seed = 1
l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
fc_param_attr = fluid.ParamAttr(regularizer=l1)
linear1 = fluid.dygraph.Linear( linear1 = fluid.dygraph.Linear(
5, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) 5, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr)
linear2 = fluid.dygraph.Linear( linear2 = fluid.dygraph.Linear(
...@@ -245,7 +253,7 @@ class TestRegularizer(unittest.TestCase): ...@@ -245,7 +253,7 @@ class TestRegularizer(unittest.TestCase):
loss1 = linear1(input) loss1 = linear1(input)
loss1.backward() loss1.backward()
# set l2 regularizer in optimizer, but l1 in fluid.ParamAttr # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr
l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01)
fluid.optimizer.SGD(parameter_list=linear1.parameters(), fluid.optimizer.SGD(parameter_list=linear1.parameters(),
learning_rate=1e-2, learning_rate=1e-2,
regularization=l2).minimize(loss1) regularization=l2).minimize(loss1)
...@@ -259,7 +267,7 @@ class TestRegularizer(unittest.TestCase): ...@@ -259,7 +267,7 @@ class TestRegularizer(unittest.TestCase):
np.allclose(linear1.weight.numpy(), linear2.weight.numpy()), np.allclose(linear1.weight.numpy(), linear2.weight.numpy()),
"weight should use the regularization in fluid.ParamAttr!") "weight should use the regularization in fluid.ParamAttr!")
self.assertTrue( self.assertTrue(
np.allclose(linear1.bias.numpy(), linear1.bias.numpy()), np.allclose(linear1.bias.numpy(), linear2.bias.numpy()),
"bias should use the regularization in fluid.ParamAttr!") "bias should use the regularization in fluid.ParamAttr!")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册