diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 4625d7ea89b25e75c2cbdc88f273882806080ca1..24076e82b0365d21e7222a16cbc3d3462699f119 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -2047,15 +2047,11 @@ class LarsMomentumOptimizer(Optimizer): def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) _lars_weight_decay = self._lars_weight_decay - _lars_coeff = self._lars_coeff param_name = param_and_grad[0].name - is_excluded = False if len(self._exclude_from_weight_decay) > 0: for name in self._exclude_from_weight_decay: if name in param_name: _lars_weight_decay = 0.0 - _lars_coeff = 0.0 - is_excluded = True break velocity_acc = self._get_accumulator(self._velocity_acc_str, @@ -2069,7 +2065,7 @@ class LarsMomentumOptimizer(Optimizer): attrs = { "mu": self._momentum, - "lars_coeff": _lars_coeff, + "lars_coeff": self._lars_coeff, "lars_weight_decay": _lars_weight_decay, "multi_precision": find_master, "rescale_grad": self._rescale_grad @@ -2090,7 +2086,7 @@ class LarsMomentumOptimizer(Optimizer): # create the momentum optimize op momentum_op = block.append_op( - type='momentum' if is_excluded else self.type, + type=self.type, inputs=inputs, outputs=outputs, attrs=attrs,