提交 6fb7e111 编写于 作者: W Weilong Wu 提交者: GitHub

Revert "refine optimizer create accumulators (#50188)"

This reverts commit 244e7546.
上级 a1e96e47
...@@ -145,11 +145,8 @@ class Adadelta(Optimizer): ...@@ -145,11 +145,8 @@ class Adadelta(Optimizer):
parameters = parameters.get('params') parameters = parameters.get('params')
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
self._add_accumulator(self._avg_squared_grad_acc_str, p) self._add_accumulator(self._avg_squared_grad_acc_str, p)
self._add_accumulator(self._avg_squared_update_acc_str, p) self._add_accumulator(self._avg_squared_update_acc_str, p)
self._already_create_accumulater.add(p.name)
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
if isinstance(param_and_grad, dict): if isinstance(param_and_grad, dict):
......
...@@ -139,14 +139,11 @@ class Adagrad(Optimizer): ...@@ -139,14 +139,11 @@ class Adagrad(Optimizer):
parameters = self._update_param_group(parameters) parameters = self._update_param_group(parameters)
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
self._add_accumulator( self._add_accumulator(
self._moment_acc_str, self._moment_acc_str,
p, p,
fill_value=self.initial_accumulator_value, fill_value=self.initial_accumulator_value,
) )
self._already_create_accumulater.add(p.name)
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
......
...@@ -317,12 +317,9 @@ class Adam(Optimizer): ...@@ -317,12 +317,9 @@ class Adam(Optimizer):
# Create accumulator tensors for first and second moments # Create accumulator tensors for first and second moments
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype): if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype):
master_p = self._create_master_weight(p) master_p = self._create_master_weight(p)
self._add_moments_pows(master_p) self._add_moments_pows(master_p)
self._already_create_accumulater.add(p.name)
continue continue
if ( if (
self._is_dtype_fp16_or_bf16(p.dtype) self._is_dtype_fp16_or_bf16(p.dtype)
...@@ -333,7 +330,6 @@ class Adam(Optimizer): ...@@ -333,7 +330,6 @@ class Adam(Optimizer):
"Consider using multi_precision=True option of the Adam optimizer." "Consider using multi_precision=True option of the Adam optimizer."
) )
self._add_moments_pows(p) self._add_moments_pows(p)
self._already_create_accumulater.add(p.name)
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
......
...@@ -176,8 +176,6 @@ class Adamax(Optimizer): ...@@ -176,8 +176,6 @@ class Adamax(Optimizer):
# Create accumulator tensors for first moment and infinity norm # Create accumulator tensors for first moment and infinity norm
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._moment_acc_str, p)
self._add_accumulator(self._inf_norm_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p)
self._add_accumulator( self._add_accumulator(
...@@ -186,7 +184,6 @@ class Adamax(Optimizer): ...@@ -186,7 +184,6 @@ class Adamax(Optimizer):
fill_value=self._beta1, fill_value=self._beta1,
shape=[1], shape=[1],
) )
self._already_create_accumulater.add(p.name)
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
......
...@@ -281,7 +281,6 @@ class AdamW(Optimizer): ...@@ -281,7 +281,6 @@ class AdamW(Optimizer):
self._use_multi_tensor = None self._use_multi_tensor = None
self.regularization = None self.regularization = None
self._auxiliary_vars = {} self._auxiliary_vars = {}
self._already_create_accumulater = set()
def _set_auxiliary_var(self, key, val): def _set_auxiliary_var(self, key, val):
self._auxiliary_vars[key] = val self._auxiliary_vars[key] = val
...@@ -423,12 +422,9 @@ class AdamW(Optimizer): ...@@ -423,12 +422,9 @@ class AdamW(Optimizer):
# Create accumulator tensors for first and second moments # Create accumulator tensors for first and second moments
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype): if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype):
master_p = self._create_master_weight(p) master_p = self._create_master_weight(p)
self._add_moments_pows(master_p) self._add_moments_pows(master_p)
self._already_create_accumulater.add(p.name)
continue continue
if ( if (
self._is_dtype_fp16_or_bf16(p.dtype) self._is_dtype_fp16_or_bf16(p.dtype)
...@@ -439,7 +435,6 @@ class AdamW(Optimizer): ...@@ -439,7 +435,6 @@ class AdamW(Optimizer):
"Consider using multi_precision=True option of the Adam optimizer." "Consider using multi_precision=True option of the Adam optimizer."
) )
self._add_moments_pows(p) self._add_moments_pows(p)
self._already_create_accumulater.add(p.name)
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
......
...@@ -190,15 +190,11 @@ class Lamb(Optimizer): ...@@ -190,15 +190,11 @@ class Lamb(Optimizer):
# Create accumulator tensors for first and second moments # Create accumulator tensors for first and second moments
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16: if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
master_p = self._create_master_weight(p) master_p = self._create_master_weight(p)
self._add_moments_pows(master_p) self._add_moments_pows(master_p)
self._already_create_accumulater.add(p.name)
else: else:
self._add_moments_pows(p) self._add_moments_pows(p)
self._already_create_accumulater.add(p.name)
def _get_accumulator(self, name, param): def _get_accumulator(self, name, param):
"""Utility function to fetch an accumulator for a parameter """Utility function to fetch an accumulator for a parameter
......
...@@ -270,12 +270,9 @@ class Momentum(Optimizer): ...@@ -270,12 +270,9 @@ class Momentum(Optimizer):
parameters = self._update_param_group(parameters) parameters = self._update_param_group(parameters)
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16: if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
master_p = self._create_master_weight(p) master_p = self._create_master_weight(p)
self._add_accumulator(self._velocity_acc_str, master_p) self._add_accumulator(self._velocity_acc_str, master_p)
self._already_create_accumulater.add(p.name)
continue continue
if ( if (
p.dtype == core.VarDesc.VarType.FP16 p.dtype == core.VarDesc.VarType.FP16
...@@ -286,7 +283,6 @@ class Momentum(Optimizer): ...@@ -286,7 +283,6 @@ class Momentum(Optimizer):
"Consider using multi_precision=True option of the Momentum optimizer." "Consider using multi_precision=True option of the Momentum optimizer."
) )
self._add_accumulator(self._velocity_acc_str, p) self._add_accumulator(self._velocity_acc_str, p)
self._already_create_accumulater.add(p.name)
def _create_regularization_of_grad(self, param, grad, regularization=None): def _create_regularization_of_grad(self, param, grad, regularization=None):
"""Create and add backward regularization Operators """Create and add backward regularization Operators
......
...@@ -275,7 +275,6 @@ class Optimizer: ...@@ -275,7 +275,6 @@ class Optimizer:
self._param_dict = self._create_multi_tensor_dict() self._param_dict = self._create_multi_tensor_dict()
self._auxiliary_vars = {} self._auxiliary_vars = {}
self._already_create_accumulater = set()
def _set_auxiliary_var(self, key, val): def _set_auxiliary_var(self, key, val):
self._auxiliary_vars[key] = val self._auxiliary_vars[key] = val
......
...@@ -199,12 +199,9 @@ class RMSProp(Optimizer): ...@@ -199,12 +199,9 @@ class RMSProp(Optimizer):
parameters = parameters.get('params') parameters = parameters.get('params')
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
self._add_accumulator(self._momentum_acc_str, p) self._add_accumulator(self._momentum_acc_str, p)
self._add_accumulator(self._mean_square_acc_str, p) self._add_accumulator(self._mean_square_acc_str, p)
self._add_accumulator(self._mean_grad_acc_str, p) self._add_accumulator(self._mean_grad_acc_str, p)
self._already_create_accumulater.add(p.name)
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
if not isinstance(block, framework.Block): if not isinstance(block, framework.Block):
......
...@@ -129,11 +129,8 @@ class SGD(Optimizer): ...@@ -129,11 +129,8 @@ class SGD(Optimizer):
# Create accumulator tensors for first and second moments # Create accumulator tensors for first and second moments
for p in parameters: for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16: if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
master_p = self._create_master_weight(p) master_p = self._create_master_weight(p)
self._already_create_accumulater.add(p.name)
continue continue
if ( if (
p.dtype == core.VarDesc.VarType.FP16 p.dtype == core.VarDesc.VarType.FP16
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册