diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index b6a959431827c2752239ef8af0a14df1eb5b6978..e66f640665e2ba9ca9aab51af3f65b50169de404 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -791,8 +791,8 @@ class Optimizer(object): params_grads = append_gradient_clip_ops(params_grads) # Add regularization if any - params_grads = append_regularization_ops( - params_grads, self.regularization, self._param_device_map) + params_grads = append_regularization_ops(params_grads, + self.regularization) optimize_ops = self._create_optimization_pass(params_grads) return optimize_ops @@ -1142,7 +1142,7 @@ class MomentumOptimizer(Optimizer): class DGCMomentumOptimizer(Optimizer): """ - :api_attr: Static Graph + :api_attr: Static Graph DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887 @@ -3068,7 +3068,7 @@ Lamb = LambOptimizer class ModelAverage(Optimizer): """ - :api_attr: Static Graph + :api_attr: Static Graph The ModelAverage optimizer accumulates specific continuous historical parameters during training. The accumulated historical range can be controlled by the passed @@ -3377,7 +3377,7 @@ class ModelAverage(Optimizer): class ExponentialMovingAverage(object): """ - :api_attr: Static Graph + :api_attr: Static Graph Compute the moving average of parameters with exponential decay. Given a parameter :math:`\\theta`, its exponential moving average (EMA) @@ -3627,7 +3627,7 @@ class ExponentialMovingAverage(object): class PipelineOptimizer(object): """ - :api_attr: Static Graph + :api_attr: Static Graph Pipeline Optimizer: Make a program to run as pipeline, that is splitting a program into multiple sections (sub-programs) and each section run on a @@ -4478,7 +4478,7 @@ class PipelineOptimizer(object): class RecomputeOptimizer(Optimizer): """ - :api_attr: Static Graph + :api_attr: Static Graph Recompute Optimizer Wrapper @@ -4563,7 +4563,7 @@ class RecomputeOptimizer(Optimizer): def load(self, stat_dict): """ - :api_attr: Static Graph + :api_attr: Static Graph load function is not supported by Recompute Optimizer for now. :return: None @@ -4787,7 +4787,7 @@ class RecomputeOptimizer(Optimizer): class LookaheadOptimizer(object): """ - :api_attr: Static Graph + :api_attr: Static Graph This implements the Lookahead optimizer of the paper : https://arxiv.org/abs/1907.08610. diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 2d411be19a4b234e325836c3e3b70872db4f81fd..9fe24ec2c9d87d1c82f8a3fbd771c714ad376aad 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -16,7 +16,7 @@ from __future__ import print_function import logging from . import framework -from .framework import in_dygraph_mode, _varbase_creator, device_guard +from .framework import in_dygraph_mode, _varbase_creator from . import core __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer'] @@ -62,9 +62,7 @@ def _create_regularization_of_grad(param, grad, regularization=None): return new_grad -def append_regularization_ops(parameters_and_grads, - regularization=None, - param_device_map=None): +def append_regularization_ops(parameters_and_grads, regularization=None): """Create and add backward regularization Operators Creates and adds backward regularization operators in the BlockDesc. @@ -95,19 +93,16 @@ def append_regularization_ops(parameters_and_grads, repeate_regularizer = False with framework.name_scope('regularization'): for param, grad in parameters_and_grads: - device = param_device_map[ - param.name] if param_device_map else None if not repeate_regularizer and param.regularizer is not None and regularization is not None: repeate_regularizer = True logging.info( "If regularizer of a Parameter has been set by 'fluid.ParamAttr' or 'fluid.WeightNormParamAttr' already. " "The Regularization[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!" % regularization.__str__()) - with device_guard(device): - with param.block.program._optimized_guard([param, grad]): - new_grad = _create_regularization_of_grad( - param, grad, regularization) - params_and_grads.append((param, new_grad)) + with param.block.program._optimized_guard([param, grad]): + new_grad = _create_regularization_of_grad(param, grad, + regularization) + params_and_grads.append((param, new_grad)) return params_and_grads