未验证 提交 6c9fa665 编写于 作者: W wanghuancoder 提交者: GitHub

delete legacy dygraph code in python/paddle/optimizer (#49308)

上级 983ae1d7
...@@ -170,7 +170,7 @@ class Adadelta(Optimizer): ...@@ -170,7 +170,7 @@ class Adadelta(Optimizer):
self._epsilon, self._epsilon,
) )
return None return None
else:
if not isinstance(block, framework.Block): if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.") raise TypeError("block is not instance of framework.Block.")
......
...@@ -16,7 +16,7 @@ import warnings ...@@ -16,7 +16,7 @@ import warnings
from collections import defaultdict from collections import defaultdict
import paddle import paddle
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops
from ..fluid import core, framework, unique_name from ..fluid import core, framework, unique_name
from ..fluid.dygraph import base as imperative_base from ..fluid.dygraph import base as imperative_base
...@@ -393,50 +393,7 @@ class Adam(Optimizer): ...@@ -393,50 +393,7 @@ class Adam(Optimizer):
) )
return None return None
else:
if framework._in_legacy_dygraph():
_beta1 = (
self._beta1
if not isinstance(self._beta1, Variable)
else self._beta1.numpy().item(0)
)
_beta2 = (
self._beta2
if not isinstance(self._beta2, Variable)
else self._beta2.numpy().item(0)
)
_, _, _, _, _, _ = _legacy_C_ops.adam(
param_and_grad[0],
param_and_grad[1],
lr,
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
param_and_grad[0],
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
'epsilon',
self._epsilon,
'lazy_mode',
self._lazy_mode,
'min_row_size_to_use_multithread',
1000,
'beta1',
_beta1,
'beta2',
_beta2,
'multi_precision',
find_master,
)
return None
inputs = { inputs = {
"Param": [param_and_grad[0]], "Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]], "Grad": [param_and_grad[1]],
...@@ -729,15 +686,13 @@ class Adam(Optimizer): ...@@ -729,15 +686,13 @@ class Adam(Optimizer):
else self._beta2.numpy().item(0) else self._beta2.numpy().item(0)
) )
if framework._non_static_mode(): if framework.in_dygraph_mode():
master_weight = self._master_weight_dict[key] master_weight = self._master_weight_dict[key]
master_weight = ( master_weight = (
master_weight[param_group_idx] master_weight[param_group_idx]
if master_weight is not None if master_weight is not None
else None else None
) )
if in_dygraph_mode():
_, _, _, _, _, _ = _C_ops.merged_adam_( _, _, _, _, _, _ = _C_ops.merged_adam_(
self._param_dict[key][param_group_idx], self._param_dict[key][param_group_idx],
grad_dict[key], grad_dict[key],
...@@ -753,31 +708,6 @@ class Adam(Optimizer): ...@@ -753,31 +708,6 @@ class Adam(Optimizer):
find_master, find_master,
False, False,
) )
else:
_, _, _, _, _, _ = _legacy_C_ops.merged_adam(
self._param_dict[key][param_group_idx],
grad_dict[key],
lr_dict[key],
self._moment1_dict[key][param_group_idx],
self._moment2_dict[key][param_group_idx],
self._beta1_pow_acc_dict[key][param_group_idx],
self._beta2_pow_acc_dict[key][param_group_idx],
master_weight,
self._param_dict[key][param_group_idx],
self._moment1_dict[key][param_group_idx],
self._moment2_dict[key][param_group_idx],
self._beta1_pow_acc_dict[key][param_group_idx],
self._beta2_pow_acc_dict[key][param_group_idx],
master_weight,
'epsilon',
self._epsilon,
'beta1',
_beta1,
'beta2',
_beta2,
'multi_precision',
find_master,
)
else: else:
inputs = { inputs = {
"Param": self._param_dict[key][param_group_idx], "Param": self._param_dict[key][param_group_idx],
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops
from ..fluid import framework from ..fluid import framework
from ..fluid.dygraph import no_grad from ..fluid.dygraph import no_grad
...@@ -210,24 +210,6 @@ class Adamax(Optimizer): ...@@ -210,24 +210,6 @@ class Adamax(Optimizer):
self._beta2, self._beta2,
self._epsilon, self._epsilon,
) )
elif framework._in_legacy_dygraph():
_legacy_C_ops.adamax(
param_and_grad[0],
param_and_grad[1],
self._create_param_lr(param_and_grad),
moment,
inf_norm,
beta1_pow_acc,
param_and_grad[0],
moment,
inf_norm,
"beta1",
self._beta1,
"beta2",
self._beta2,
"epsilon",
self._epsilon,
)
else: else:
# create the adamax optimize op # create the adamax optimize op
adamax_op = block.append_op( adamax_op = block.append_op(
...@@ -271,7 +253,7 @@ class Adamax(Optimizer): ...@@ -271,7 +253,7 @@ class Adamax(Optimizer):
beta1_pow_acc, self._beta1, 0.0, True beta1_pow_acc, self._beta1, 0.0, True
) )
beta1_pow_acc.copy_(tmp, False) beta1_pow_acc.copy_(tmp, False)
continue else:
with param.block.program._optimized_guard( with param.block.program._optimized_guard(
[param, grad] [param, grad]
), name_scope('adamax'): ), name_scope('adamax'):
...@@ -301,8 +283,7 @@ class Adamax(Optimizer): ...@@ -301,8 +283,7 @@ class Adamax(Optimizer):
beta1_pow_acc, self._beta1, 0.0, True beta1_pow_acc, self._beta1, 0.0, True
) )
beta1_pow_acc.copy_(tmp, False) beta1_pow_acc.copy_(tmp, False)
continue else:
with param.block.program._optimized_guard( with param.block.program._optimized_guard(
[param, grad] [param, grad]
), name_scope('adamax'): ), name_scope('adamax'):
......
...@@ -18,7 +18,7 @@ from collections.abc import Callable ...@@ -18,7 +18,7 @@ from collections.abc import Callable
import paddle import paddle
from .. import _C_ops, _legacy_C_ops from .. import _C_ops
from ..fluid import core, framework, unique_name from ..fluid import core, framework, unique_name
from ..fluid.clip import GradientClipBase from ..fluid.clip import GradientClipBase
from ..fluid.dygraph import base as imperative_base from ..fluid.dygraph import base as imperative_base
...@@ -473,7 +473,7 @@ class AdamW(Optimizer): ...@@ -473,7 +473,7 @@ class AdamW(Optimizer):
lr = self._create_param_lr(param_and_grad) lr = self._create_param_lr(param_and_grad)
# create the adamw optimize op # create the adamw optimize op
if framework._non_static_mode(): if framework.in_dygraph_mode():
lr_ratio_ = ( lr_ratio_ = (
1.0 1.0
if self._lr_ratio is None if self._lr_ratio is None
...@@ -491,7 +491,6 @@ class AdamW(Optimizer): ...@@ -491,7 +491,6 @@ class AdamW(Optimizer):
else self._beta2.numpy().item(0) else self._beta2.numpy().item(0)
) )
if framework.in_dygraph_mode():
found_inf = self._get_auxiliary_var('found_inf') found_inf = self._get_auxiliary_var('found_inf')
_, _, _, _, _, _ = _C_ops.adamw_( _, _, _, _, _, _ = _C_ops.adamw_(
param_and_grad[0], param_and_grad[0],
...@@ -514,43 +513,8 @@ class AdamW(Optimizer): ...@@ -514,43 +513,8 @@ class AdamW(Optimizer):
find_master, find_master,
False, False,
) )
else:
_, _, _, _, _, _ = _legacy_C_ops.adamw(
param_and_grad[0],
param_and_grad[1],
lr,
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
param_and_grad[0],
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
'epsilon',
self._epsilon,
'lazy_mode',
self._lazy_mode,
'min_row_size_to_use_multithread',
1000,
'beta1',
_beta1,
'beta2',
_beta2,
"with_decay",
with_decay,
'coeff',
self._weight_decay,
'multi_precision',
find_master,
'lr_ratio',
lr_ratio_,
)
return None return None
else:
inputs = { inputs = {
"Param": [param_and_grad[0]], "Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]], "Grad": [param_and_grad[1]],
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import paddle import paddle
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops
from paddle.fluid.executor import global_scope from paddle.fluid.executor import global_scope
from ..fluid import core, framework, unique_name from ..fluid import core, framework, unique_name
...@@ -313,35 +313,7 @@ class Lamb(Optimizer): ...@@ -313,35 +313,7 @@ class Lamb(Optimizer):
find_master, find_master,
) )
return None return None
if framework._non_static_mode(): else:
_legacy_C_ops.lamb(
param_and_grad[0],
param_and_grad[1],
lr,
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
param_and_grad[0],
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
'beta1',
self._beta1,
'beta2',
self._beta2,
'epsilon',
self._epsilon,
'weight_decay',
weight_decay,
'multi_precision',
find_master,
)
return None
# create the lamb optimize op # create the lamb optimize op
inputs = { inputs = {
"Param": param_and_grad[0], "Param": param_and_grad[0],
......
...@@ -20,8 +20,6 @@ import numpy ...@@ -20,8 +20,6 @@ import numpy
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle import Tensor from paddle import Tensor
from ..fluid.framework import _in_legacy_dygraph
__all__ = [ # noqa __all__ = [ # noqa
'LRScheduler', 'LRScheduler',
'NoamDecay', 'NoamDecay',
...@@ -1395,15 +1393,8 @@ class ReduceOnPlateau(LRScheduler): ...@@ -1395,15 +1393,8 @@ class ReduceOnPlateau(LRScheduler):
else: else:
self.last_epoch = epoch self.last_epoch = epoch
if not _in_legacy_dygraph():
tmp = core.eager.Tensor
else:
# need to declarate explicitly
from paddle.framework import VarBase as Tensor
tmp = Tensor
# loss must be float, numpy.ndarray or 1-D Tensor with shape [1] # loss must be float, numpy.ndarray or 1-D Tensor with shape [1]
if isinstance(metrics, (tmp, numpy.ndarray)): if isinstance(metrics, (core.eager.Tensor, numpy.ndarray)):
assert len(metrics.shape) == 1 and metrics.shape[0] == 1, ( assert len(metrics.shape) == 1 and metrics.shape[0] == 1, (
"the metrics.shape " "the metrics.shape "
"should be (1L,), but the current metrics.shape is {}. Maybe that " "should be (1L,), but the current metrics.shape is {}. Maybe that "
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
import warnings import warnings
import paddle import paddle
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops
from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.regularizer import L2DecayRegularizer from paddle.fluid.regularizer import L2DecayRegularizer
from ..fluid import core, framework, unique_name from ..fluid import core, framework, unique_name
...@@ -333,30 +333,6 @@ class Momentum(Optimizer): ...@@ -333,30 +333,6 @@ class Momentum(Optimizer):
else None else None
) )
if _in_legacy_dygraph():
if isinstance(param_and_grad, dict):
self._update_regularization(param_and_grad['weight_decay'])
_, _, _ = _legacy_C_ops.momentum(
param_and_grad[0],
param_and_grad[1],
velocity_acc,
lr,
master_weight,
param_and_grad[0],
velocity_acc,
master_weight,
'mu',
self._momentum,
'use_nesterov',
self._use_nesterov,
'regularization_method',
regularization_method,
'regularization_coeff',
regularization_coeff,
'multi_precision',
find_master,
)
return None
if in_dygraph_mode(): if in_dygraph_mode():
if isinstance(param_and_grad, dict): if isinstance(param_and_grad, dict):
self._update_regularization(param_and_grad['weight_decay']) self._update_regularization(param_and_grad['weight_decay'])
...@@ -373,7 +349,7 @@ class Momentum(Optimizer): ...@@ -373,7 +349,7 @@ class Momentum(Optimizer):
find_master, find_master,
self._rescale_grad, self._rescale_grad,
) )
else:
attrs = { attrs = {
"mu": self._momentum, "mu": self._momentum,
"use_nesterov": self._use_nesterov, "use_nesterov": self._use_nesterov,
...@@ -553,7 +529,6 @@ class Momentum(Optimizer): ...@@ -553,7 +529,6 @@ class Momentum(Optimizer):
else None else None
) )
if framework._non_static_mode():
if in_dygraph_mode(): if in_dygraph_mode():
_, _, _ = _C_ops.merged_momentum_( _, _, _ = _C_ops.merged_momentum_(
self._param_dict[key][param_group_idx], self._param_dict[key][param_group_idx],
...@@ -563,40 +538,11 @@ class Momentum(Optimizer): ...@@ -563,40 +538,11 @@ class Momentum(Optimizer):
master_weight, master_weight,
self._momentum, self._momentum,
self._use_nesterov, self._use_nesterov,
self._regularization_method_dict[key][ self._regularization_method_dict[key][param_group_idx],
param_group_idx self._regularization_coeff_dict[key][param_group_idx],
],
self._regularization_coeff_dict[key][
param_group_idx
],
find_master, find_master,
self._rescale_grad, self._rescale_grad,
) )
else:
_, _, _ = _legacy_C_ops.merged_momentum(
self._param_dict[key][param_group_idx],
grad_dict[key],
self._velocity_dict[key][param_group_idx],
lr_dict[key],
master_weight,
self._param_dict[key][param_group_idx],
self._velocity_dict[key][param_group_idx],
master_weight,
'mu',
self._momentum,
'use_nesterov',
self._use_nesterov,
'regularization_method',
self._regularization_method_dict[key][
param_group_idx
],
'regularization_coeff',
self._regularization_coeff_dict[key][
param_group_idx
],
'multi_precision',
find_master,
)
else: else:
inputs = { inputs = {
"Param": self._param_dict[key][param_group_idx], "Param": self._param_dict[key][param_group_idx],
......
...@@ -18,13 +18,12 @@ from collections import defaultdict ...@@ -18,13 +18,12 @@ from collections import defaultdict
import numpy as np import numpy as np
import paddle import paddle
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.framework import ( from paddle.fluid.framework import (
Variable, Variable,
_current_expected_place, _current_expected_place,
_in_eager_without_dygraph_check, _in_eager_without_dygraph_check,
_in_legacy_dygraph,
default_main_program, default_main_program,
device_guard, device_guard,
in_dygraph_mode, in_dygraph_mode,
...@@ -534,17 +533,6 @@ class Optimizer: ...@@ -534,17 +533,6 @@ class Optimizer:
current_lr.dtype, current_lr.dtype,
place, place,
) )
elif _in_legacy_dygraph():
_legacy_C_ops.fill_constant(
current_lr,
'value',
float(value),
'dtype',
current_lr.dtype,
'shape',
list(current_lr.shape),
)
else: else:
global_block = framework.default_main_program().global_block() global_block = framework.default_main_program().global_block()
global_block.append_op( global_block.append_op(
...@@ -1042,10 +1030,9 @@ class Optimizer: ...@@ -1042,10 +1030,9 @@ class Optimizer:
if self._dtype is None: if self._dtype is None:
self._dtype = loss.dtype self._dtype = loss.dtype
if framework._non_static_mode(): if framework.in_dygraph_mode():
parameter_list = parameters if parameters else self._parameter_list parameter_list = parameters if parameters else self._parameter_list
if framework.in_dygraph_mode():
# It is very time-consuming to call c++ functions in a loop on the python side. # It is very time-consuming to call c++ functions in a loop on the python side.
# We put this part of the code on the c++ side to improve the speed in eager mode. # We put this part of the code on the c++ side to improve the speed in eager mode.
params_grads = [] params_grads = []
...@@ -1053,17 +1040,6 @@ class Optimizer: ...@@ -1053,17 +1040,6 @@ class Optimizer:
for index, grad in enumerate(grads): for index, grad in enumerate(grads):
if grad is not None: if grad is not None:
params_grads.append((parameter_list[index], grad)) params_grads.append((parameter_list[index], grad))
else:
# Keep the original code to support legacy mode.
# Delete the else branch when the legacy mode exits.
params_grads = []
for param in parameter_list:
if param.stop_gradient:
continue
if param._grad_ivar() is not None:
# create gradient tensor
grad_var = param._grad_ivar()
params_grads.append((param, grad_var))
else: else:
if callbacks is None: if callbacks is None:
callbacks = [error_clip_callback] callbacks = [error_clip_callback]
...@@ -1207,9 +1183,7 @@ class Optimizer: ...@@ -1207,9 +1183,7 @@ class Optimizer:
if framework.in_dygraph_mode(): if framework.in_dygraph_mode():
return _C_ops.add_n([grad, regularization_term]) return _C_ops.add_n([grad, regularization_term])
elif framework._in_legacy_dygraph(): else:
return _legacy_C_ops.sum([grad, regularization_term])
new_grad = grad new_grad = grad
if grad.type == core.VarDesc.VarType.SELECTED_ROWS: if grad.type == core.VarDesc.VarType.SELECTED_ROWS:
# FIXME(zcd): If the grad is SELECTED_ROWS, after regularization, # FIXME(zcd): If the grad is SELECTED_ROWS, after regularization,
......
...@@ -15,11 +15,11 @@ ...@@ -15,11 +15,11 @@
import warnings import warnings
import paddle import paddle
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops
from ..fluid import core, framework, unique_name from ..fluid import core, framework, unique_name
from ..fluid.dygraph import no_grad from ..fluid.dygraph import no_grad
from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode from ..fluid.framework import in_dygraph_mode
from ..fluid.layer_helper import LayerHelper from ..fluid.layer_helper import LayerHelper
from .optimizer import Optimizer from .optimizer import Optimizer
...@@ -166,17 +166,7 @@ class SGD(Optimizer): ...@@ -166,17 +166,7 @@ class SGD(Optimizer):
find_master, find_master,
) )
return None return None
if _in_legacy_dygraph(): else:
_legacy_C_ops.sgd(
param_and_grad[0],
lr,
param_and_grad[1],
master_weight,
param_and_grad[0],
master_weight,
)
return None
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
# create the optimize op # create the optimize op
inputs = { inputs = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册