Revert "add device attr for regularizer, test=develop (#24981)" (#25375)

This reverts commit ab5a1fb8.

Revert "add device attr for regularizer, test=develop (#24981)" (#25375)
This reverts commit ab5a1fb8.
8a68d2c2 · lilong12 · GitHub · 2d9dbd31 · 8a68d2c2 · 8a68d2c2
隐藏空白更改
内联并排

Showing with 15 addition and 20 deletion

python/paddle/fluid/optimizer.py python/paddle/fluid/optimizer.py +9 -9

python/paddle/fluid/regularizer.py python/paddle/fluid/regularizer.py +6 -11

未找到文件。
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -791,8 +791,8 @@ class Optimizer(object):
            params_grads = append_gradient_clip_ops(params_grads)
        # Add regularization if any
-        params_grads = append_regularization_ops(
+        params_grads = append_regularization_ops(params_grads,
-            params_grads, self.regularization, self._param_device_map)
+                                                 self.regularization)
        optimize_ops = self._create_optimization_pass(params_grads)
        return optimize_ops
@@ -1142,7 +1142,7 @@ class MomentumOptimizer(Optimizer):
 class DGCMomentumOptimizer(Optimizer):
    """
 	:api_attr: Static Graph
    DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887
@@ -3068,7 +3068,7 @@ Lamb = LambOptimizer
 class ModelAverage(Optimizer):
    """
 	:api_attr: Static Graph
    The ModelAverage optimizer accumulates specific continuous historical parameters
    during training. The accumulated historical range can be controlled by the passed
@@ -3377,7 +3377,7 @@ class ModelAverage(Optimizer):
 class ExponentialMovingAverage(object):
    """
 	:api_attr: Static Graph
    Compute the moving average of parameters with exponential decay.
    Given a parameter :math:`\\theta`, its exponential moving average (EMA)
@@ -3627,7 +3627,7 @@ class ExponentialMovingAverage(object):
 class PipelineOptimizer(object):
    """
 	:api_attr: Static Graph
    Pipeline Optimizer: Make a program to run as pipeline, that is splitting a
    program into multiple sections (sub-programs) and each section run on a
@@ -4478,7 +4478,7 @@ class PipelineOptimizer(object):
 class RecomputeOptimizer(Optimizer):
    """
 	:api_attr: Static Graph
    Recompute Optimizer Wrapper
@@ -4563,7 +4563,7 @@ class RecomputeOptimizer(Optimizer):
    def load(self, stat_dict):
        """
 	:api_attr: Static Graph
        load function is not supported by Recompute Optimizer for now.
        :return: None
@@ -4787,7 +4787,7 @@ class RecomputeOptimizer(Optimizer):
 class LookaheadOptimizer(object):
    """
 	:api_attr: Static Graph
    This implements the Lookahead optimizer of the
    paper : https://arxiv.org/abs/1907.08610.

--- a/python/paddle/fluid/regularizer.py
+++ b/python/paddle/fluid/regularizer.py
@@ -16,7 +16,7 @@ from __future__ import print_function
 import logging
 from . import framework
-from .framework import in_dygraph_mode, _varbase_creator, device_guard
+from .framework import in_dygraph_mode, _varbase_creator
 from . import core
 __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer']
@@ -62,9 +62,7 @@ def _create_regularization_of_grad(param, grad, regularization=None):
    return new_grad
-def append_regularization_ops(parameters_and_grads,
+def append_regularization_ops(parameters_and_grads, regularization=None):
-                              regularization=None,
-                              param_device_map=None):
    """Create and add backward regularization Operators
    Creates and adds backward regularization operators in the BlockDesc.
@@ -95,19 +93,16 @@ def append_regularization_ops(parameters_and_grads,
        repeate_regularizer = False
        with framework.name_scope('regularization'):
            for param, grad in parameters_and_grads:
-                device = param_device_map[
-                    param.name] if param_device_map else None
                if not repeate_regularizer and param.regularizer is not None and regularization is not None:
                    repeate_regularizer = True
                    logging.info(
                        "If regularizer of a Parameter has been set by 'fluid.ParamAttr' or 'fluid.WeightNormParamAttr' already. "
                        "The Regularization[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
                        % regularization.__str__())
-                with device_guard(device):
+                with param.block.program._optimized_guard([param, grad]):
-                    with param.block.program._optimized_guard([param, grad]):
+                    new_grad = _create_regularization_of_grad(param, grad,
-                        new_grad = _create_regularization_of_grad(
+                                                              regularization)
-                            param, grad, regularization)
+                    params_and_grads.append((param, new_grad))
-                        params_and_grads.append((param, new_grad))
    return params_and_grads