From 6fb7e111c9c446e848ab9320f42b08db660145b1 Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Thu, 23 Feb 2023 19:12:55 +0800 Subject: [PATCH] Revert "refine optimizer create accumulators (#50188)" This reverts commit 244e75466a41a997ff0563f598c0fb782485ac19. --- python/paddle/optimizer/adadelta.py | 3 --- python/paddle/optimizer/adagrad.py | 3 --- python/paddle/optimizer/adam.py | 4 ---- python/paddle/optimizer/adamax.py | 3 --- python/paddle/optimizer/adamw.py | 5 ----- python/paddle/optimizer/lamb.py | 4 ---- python/paddle/optimizer/momentum.py | 4 ---- python/paddle/optimizer/optimizer.py | 1 - python/paddle/optimizer/rmsprop.py | 3 --- python/paddle/optimizer/sgd.py | 3 --- 10 files changed, 33 deletions(-) diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index 5ece3282678..ff0f0a13fed 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -145,11 +145,8 @@ class Adadelta(Optimizer): parameters = parameters.get('params') for p in parameters: - if p.name in self._already_create_accumulater: - continue self._add_accumulator(self._avg_squared_grad_acc_str, p) self._add_accumulator(self._avg_squared_update_acc_str, p) - self._already_create_accumulater.add(p.name) def _append_optimize_op(self, block, param_and_grad): if isinstance(param_and_grad, dict): diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index ff65c86dbeb..6bea5773270 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -139,14 +139,11 @@ class Adagrad(Optimizer): parameters = self._update_param_group(parameters) for p in parameters: - if p.name in self._already_create_accumulater: - continue self._add_accumulator( self._moment_acc_str, p, fill_value=self.initial_accumulator_value, ) - self._already_create_accumulater.add(p.name) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 0e33cac29c7..9c827496e8b 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -317,12 +317,9 @@ class Adam(Optimizer): # Create accumulator tensors for first and second moments for p in parameters: - if p.name in self._already_create_accumulater: - continue if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype): master_p = self._create_master_weight(p) self._add_moments_pows(master_p) - self._already_create_accumulater.add(p.name) continue if ( self._is_dtype_fp16_or_bf16(p.dtype) @@ -333,7 +330,6 @@ class Adam(Optimizer): "Consider using multi_precision=True option of the Adam optimizer." ) self._add_moments_pows(p) - self._already_create_accumulater.add(p.name) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py index 5409a05787d..c460ab6be03 100644 --- a/python/paddle/optimizer/adamax.py +++ b/python/paddle/optimizer/adamax.py @@ -176,8 +176,6 @@ class Adamax(Optimizer): # Create accumulator tensors for first moment and infinity norm for p in parameters: - if p.name in self._already_create_accumulater: - continue self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p) self._add_accumulator( @@ -186,7 +184,6 @@ class Adamax(Optimizer): fill_value=self._beta1, shape=[1], ) - self._already_create_accumulater.add(p.name) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index ddacd33527e..5a75e6d2436 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -281,7 +281,6 @@ class AdamW(Optimizer): self._use_multi_tensor = None self.regularization = None self._auxiliary_vars = {} - self._already_create_accumulater = set() def _set_auxiliary_var(self, key, val): self._auxiliary_vars[key] = val @@ -423,12 +422,9 @@ class AdamW(Optimizer): # Create accumulator tensors for first and second moments for p in parameters: - if p.name in self._already_create_accumulater: - continue if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype): master_p = self._create_master_weight(p) self._add_moments_pows(master_p) - self._already_create_accumulater.add(p.name) continue if ( self._is_dtype_fp16_or_bf16(p.dtype) @@ -439,7 +435,6 @@ class AdamW(Optimizer): "Consider using multi_precision=True option of the Adam optimizer." ) self._add_moments_pows(p) - self._already_create_accumulater.add(p.name) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) diff --git a/python/paddle/optimizer/lamb.py b/python/paddle/optimizer/lamb.py index a577e7c0771..57904cd44a8 100644 --- a/python/paddle/optimizer/lamb.py +++ b/python/paddle/optimizer/lamb.py @@ -190,15 +190,11 @@ class Lamb(Optimizer): # Create accumulator tensors for first and second moments for p in parameters: - if p.name in self._already_create_accumulater: - continue if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16: master_p = self._create_master_weight(p) self._add_moments_pows(master_p) - self._already_create_accumulater.add(p.name) else: self._add_moments_pows(p) - self._already_create_accumulater.add(p.name) def _get_accumulator(self, name, param): """Utility function to fetch an accumulator for a parameter diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py index 07839bbe3ef..bff9c1209e7 100644 --- a/python/paddle/optimizer/momentum.py +++ b/python/paddle/optimizer/momentum.py @@ -270,12 +270,9 @@ class Momentum(Optimizer): parameters = self._update_param_group(parameters) for p in parameters: - if p.name in self._already_create_accumulater: - continue if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16: master_p = self._create_master_weight(p) self._add_accumulator(self._velocity_acc_str, master_p) - self._already_create_accumulater.add(p.name) continue if ( p.dtype == core.VarDesc.VarType.FP16 @@ -286,7 +283,6 @@ class Momentum(Optimizer): "Consider using multi_precision=True option of the Momentum optimizer." ) self._add_accumulator(self._velocity_acc_str, p) - self._already_create_accumulater.add(p.name) def _create_regularization_of_grad(self, param, grad, regularization=None): """Create and add backward regularization Operators diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index ef177c52a28..d9e1cd45604 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -275,7 +275,6 @@ class Optimizer: self._param_dict = self._create_multi_tensor_dict() self._auxiliary_vars = {} - self._already_create_accumulater = set() def _set_auxiliary_var(self, key, val): self._auxiliary_vars[key] = val diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py index ae342d4c021..855082eae5f 100644 --- a/python/paddle/optimizer/rmsprop.py +++ b/python/paddle/optimizer/rmsprop.py @@ -199,12 +199,9 @@ class RMSProp(Optimizer): parameters = parameters.get('params') for p in parameters: - if p.name in self._already_create_accumulater: - continue self._add_accumulator(self._momentum_acc_str, p) self._add_accumulator(self._mean_square_acc_str, p) self._add_accumulator(self._mean_grad_acc_str, p) - self._already_create_accumulater.add(p.name) def _append_optimize_op(self, block, param_and_grad): if not isinstance(block, framework.Block): diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index d65857f1dd4..c188cd15a8c 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -129,11 +129,8 @@ class SGD(Optimizer): # Create accumulator tensors for first and second moments for p in parameters: - if p.name in self._already_create_accumulater: - continue if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16: master_p = self._create_master_weight(p) - self._already_create_accumulater.add(p.name) continue if ( p.dtype == core.VarDesc.VarType.FP16 -- GitLab