未验证 提交 a2603c5b 编写于 作者: A Aurelius84 提交者: GitHub

Remove _optimized_guard of append_regularization_ops in Dgraph mode (#22271)

* polish regularization test=develop

* add comment of function test=develop

* rm name_scope in dygraph mode test=develop
上级 faba4b11
......@@ -21,35 +21,14 @@ from . import core
__all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer']
def append_regularization_ops(parameters_and_grads, regularization=None):
"""Create and add backward regularization Operators
Creates and adds backward regularization operators in the BlockDesc.
This will add gradients of the regularizer function to the gradients
of the parameters and return these modified gradients. This is the
same as implementing weight decay in optimizers for regularization.
Args:
parameters_and_grads: A list of (parameters, gradients) pairs
that need to be regularized.
regularization: A global regularizer. If the parameter is not
set. It will be applied with regularizer.
def _create_regularization_of_grad(param, grad, regularization=None):
""" Create and add backward regularization Operators
Returns:
list[(Variable, Variable)]: list of (parameters, gradients) \
pair with the regularized gradient
Raises:
Exception: Unknown regularization type
Function helper of append_regularization_ops.
"""
params_and_grads = []
for param, grad in parameters_and_grads:
# If no gradient then we don't need to do anything
if grad is None:
params_and_grads.append((param, grad))
continue
with param.block.program._optimized_guard(
[param, grad]), framework.name_scope('regularization'):
# If no gradient or no regularization is specified, then we don't need to do anything
if grad is None or (param.regularizer is None and regularization is None):
return grad
regularization_term = None
if param.regularizer is not None:
# Add variable for regularization term in grad block
......@@ -57,10 +36,7 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
elif regularization is not None:
regularization_term = regularization(param, grad, grad.block)
# If no regularization specified, then we don't need to do anything
if regularization_term is None:
params_and_grads.append((param, grad))
continue
assert regularization_term is not None
new_grad = grad
if grad.type == core.VarDesc.VarType.SELECTED_ROWS:
......@@ -82,8 +58,43 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
else:
grad.block.append_op(type='sum', inputs=inputs, outputs=outputs)
params_and_grads.append((param, new_grad))
return new_grad
def append_regularization_ops(parameters_and_grads, regularization=None):
"""Create and add backward regularization Operators
Creates and adds backward regularization operators in the BlockDesc.
This will add gradients of the regularizer function to the gradients
of the parameters and return these modified gradients. This is the
same as implementing weight decay in optimizers for regularization.
Args:
parameters_and_grads: A list of (parameters, gradients) pairs
that need to be regularized.
regularization: A global regularizer. If the parameter is not
set. It will be applied with regularizer.
Returns:
list[(Variable, Variable)]: list of (parameters, gradients) \
pair with the regularized gradient
Raises:
Exception: Unknown regularization type
"""
params_and_grads = []
if in_dygraph_mode():
for param, grad in parameters_and_grads:
new_grad = _create_regularization_of_grad(param, grad,
regularization)
params_and_grads.append((param, new_grad))
else:
with framework.name_scope('regularization'):
for param, grad in parameters_and_grads:
with param.block.program._optimized_guard([param, grad]):
new_grad = _create_regularization_of_grad(param, grad,
regularization)
params_and_grads.append((param, new_grad))
return params_and_grads
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册