未验证 提交 6c9fa665 编写于 作者: W wanghuancoder 提交者: GitHub

delete legacy dygraph code in python/paddle/optimizer (#49308)

上级 983ae1d7
......@@ -170,7 +170,7 @@ class Adadelta(Optimizer):
self._epsilon,
)
return None
else:
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
......
......@@ -16,7 +16,7 @@ import warnings
from collections import defaultdict
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _C_ops
from ..fluid import core, framework, unique_name
from ..fluid.dygraph import base as imperative_base
......@@ -393,50 +393,7 @@ class Adam(Optimizer):
)
return None
if framework._in_legacy_dygraph():
_beta1 = (
self._beta1
if not isinstance(self._beta1, Variable)
else self._beta1.numpy().item(0)
)
_beta2 = (
self._beta2
if not isinstance(self._beta2, Variable)
else self._beta2.numpy().item(0)
)
_, _, _, _, _, _ = _legacy_C_ops.adam(
param_and_grad[0],
param_and_grad[1],
lr,
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
param_and_grad[0],
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
'epsilon',
self._epsilon,
'lazy_mode',
self._lazy_mode,
'min_row_size_to_use_multithread',
1000,
'beta1',
_beta1,
'beta2',
_beta2,
'multi_precision',
find_master,
)
return None
else:
inputs = {
"Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]],
......@@ -729,15 +686,13 @@ class Adam(Optimizer):
else self._beta2.numpy().item(0)
)
if framework._non_static_mode():
if framework.in_dygraph_mode():
master_weight = self._master_weight_dict[key]
master_weight = (
master_weight[param_group_idx]
if master_weight is not None
else None
)
if in_dygraph_mode():
_, _, _, _, _, _ = _C_ops.merged_adam_(
self._param_dict[key][param_group_idx],
grad_dict[key],
......@@ -753,31 +708,6 @@ class Adam(Optimizer):
find_master,
False,
)
else:
_, _, _, _, _, _ = _legacy_C_ops.merged_adam(
self._param_dict[key][param_group_idx],
grad_dict[key],
lr_dict[key],
self._moment1_dict[key][param_group_idx],
self._moment2_dict[key][param_group_idx],
self._beta1_pow_acc_dict[key][param_group_idx],
self._beta2_pow_acc_dict[key][param_group_idx],
master_weight,
self._param_dict[key][param_group_idx],
self._moment1_dict[key][param_group_idx],
self._moment2_dict[key][param_group_idx],
self._beta1_pow_acc_dict[key][param_group_idx],
self._beta2_pow_acc_dict[key][param_group_idx],
master_weight,
'epsilon',
self._epsilon,
'beta1',
_beta1,
'beta2',
_beta2,
'multi_precision',
find_master,
)
else:
inputs = {
"Param": self._param_dict[key][param_group_idx],
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle import _C_ops, _legacy_C_ops
from paddle import _C_ops
from ..fluid import framework
from ..fluid.dygraph import no_grad
......@@ -210,24 +210,6 @@ class Adamax(Optimizer):
self._beta2,
self._epsilon,
)
elif framework._in_legacy_dygraph():
_legacy_C_ops.adamax(
param_and_grad[0],
param_and_grad[1],
self._create_param_lr(param_and_grad),
moment,
inf_norm,
beta1_pow_acc,
param_and_grad[0],
moment,
inf_norm,
"beta1",
self._beta1,
"beta2",
self._beta2,
"epsilon",
self._epsilon,
)
else:
# create the adamax optimize op
adamax_op = block.append_op(
......@@ -271,7 +253,7 @@ class Adamax(Optimizer):
beta1_pow_acc, self._beta1, 0.0, True
)
beta1_pow_acc.copy_(tmp, False)
continue
else:
with param.block.program._optimized_guard(
[param, grad]
), name_scope('adamax'):
......@@ -301,8 +283,7 @@ class Adamax(Optimizer):
beta1_pow_acc, self._beta1, 0.0, True
)
beta1_pow_acc.copy_(tmp, False)
continue
else:
with param.block.program._optimized_guard(
[param, grad]
), name_scope('adamax'):
......
......@@ -18,7 +18,7 @@ from collections.abc import Callable
import paddle
from .. import _C_ops, _legacy_C_ops
from .. import _C_ops
from ..fluid import core, framework, unique_name
from ..fluid.clip import GradientClipBase
from ..fluid.dygraph import base as imperative_base
......@@ -473,7 +473,7 @@ class AdamW(Optimizer):
lr = self._create_param_lr(param_and_grad)
# create the adamw optimize op
if framework._non_static_mode():
if framework.in_dygraph_mode():
lr_ratio_ = (
1.0
if self._lr_ratio is None
......@@ -491,7 +491,6 @@ class AdamW(Optimizer):
else self._beta2.numpy().item(0)
)
if framework.in_dygraph_mode():
found_inf = self._get_auxiliary_var('found_inf')
_, _, _, _, _, _ = _C_ops.adamw_(
param_and_grad[0],
......@@ -514,43 +513,8 @@ class AdamW(Optimizer):
find_master,
False,
)
else:
_, _, _, _, _, _ = _legacy_C_ops.adamw(
param_and_grad[0],
param_and_grad[1],
lr,
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
param_and_grad[0],
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
'epsilon',
self._epsilon,
'lazy_mode',
self._lazy_mode,
'min_row_size_to_use_multithread',
1000,
'beta1',
_beta1,
'beta2',
_beta2,
"with_decay",
with_decay,
'coeff',
self._weight_decay,
'multi_precision',
find_master,
'lr_ratio',
lr_ratio_,
)
return None
else:
inputs = {
"Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]],
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _C_ops
from paddle.fluid.executor import global_scope
from ..fluid import core, framework, unique_name
......@@ -313,35 +313,7 @@ class Lamb(Optimizer):
find_master,
)
return None
if framework._non_static_mode():
_legacy_C_ops.lamb(
param_and_grad[0],
param_and_grad[1],
lr,
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
param_and_grad[0],
moment1,
moment2,
beta1_pow_acc,
beta2_pow_acc,
master_weight,
'beta1',
self._beta1,
'beta2',
self._beta2,
'epsilon',
self._epsilon,
'weight_decay',
weight_decay,
'multi_precision',
find_master,
)
return None
else:
# create the lamb optimize op
inputs = {
"Param": param_and_grad[0],
......
......@@ -20,8 +20,6 @@ import numpy
import paddle.fluid.core as core
from paddle import Tensor
from ..fluid.framework import _in_legacy_dygraph
__all__ = [ # noqa
'LRScheduler',
'NoamDecay',
......@@ -1395,15 +1393,8 @@ class ReduceOnPlateau(LRScheduler):
else:
self.last_epoch = epoch
if not _in_legacy_dygraph():
tmp = core.eager.Tensor
else:
# need to declarate explicitly
from paddle.framework import VarBase as Tensor
tmp = Tensor
# loss must be float, numpy.ndarray or 1-D Tensor with shape [1]
if isinstance(metrics, (tmp, numpy.ndarray)):
if isinstance(metrics, (core.eager.Tensor, numpy.ndarray)):
assert len(metrics.shape) == 1 and metrics.shape[0] == 1, (
"the metrics.shape "
"should be (1L,), but the current metrics.shape is {}. Maybe that "
......
......@@ -15,8 +15,8 @@
import warnings
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
from paddle import _C_ops
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.regularizer import L2DecayRegularizer
from ..fluid import core, framework, unique_name
......@@ -333,30 +333,6 @@ class Momentum(Optimizer):
else None
)
if _in_legacy_dygraph():
if isinstance(param_and_grad, dict):
self._update_regularization(param_and_grad['weight_decay'])
_, _, _ = _legacy_C_ops.momentum(
param_and_grad[0],
param_and_grad[1],
velocity_acc,
lr,
master_weight,
param_and_grad[0],
velocity_acc,
master_weight,
'mu',
self._momentum,
'use_nesterov',
self._use_nesterov,
'regularization_method',
regularization_method,
'regularization_coeff',
regularization_coeff,
'multi_precision',
find_master,
)
return None
if in_dygraph_mode():
if isinstance(param_and_grad, dict):
self._update_regularization(param_and_grad['weight_decay'])
......@@ -373,7 +349,7 @@ class Momentum(Optimizer):
find_master,
self._rescale_grad,
)
else:
attrs = {
"mu": self._momentum,
"use_nesterov": self._use_nesterov,
......@@ -553,7 +529,6 @@ class Momentum(Optimizer):
else None
)
if framework._non_static_mode():
if in_dygraph_mode():
_, _, _ = _C_ops.merged_momentum_(
self._param_dict[key][param_group_idx],
......@@ -563,40 +538,11 @@ class Momentum(Optimizer):
master_weight,
self._momentum,
self._use_nesterov,
self._regularization_method_dict[key][
param_group_idx
],
self._regularization_coeff_dict[key][
param_group_idx
],
self._regularization_method_dict[key][param_group_idx],
self._regularization_coeff_dict[key][param_group_idx],
find_master,
self._rescale_grad,
)
else:
_, _, _ = _legacy_C_ops.merged_momentum(
self._param_dict[key][param_group_idx],
grad_dict[key],
self._velocity_dict[key][param_group_idx],
lr_dict[key],
master_weight,
self._param_dict[key][param_group_idx],
self._velocity_dict[key][param_group_idx],
master_weight,
'mu',
self._momentum,
'use_nesterov',
self._use_nesterov,
'regularization_method',
self._regularization_method_dict[key][
param_group_idx
],
'regularization_coeff',
self._regularization_coeff_dict[key][
param_group_idx
],
'multi_precision',
find_master,
)
else:
inputs = {
"Param": self._param_dict[key][param_group_idx],
......
......@@ -18,13 +18,12 @@ from collections import defaultdict
import numpy as np
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _C_ops
from paddle.fluid import core
from paddle.fluid.framework import (
Variable,
_current_expected_place,
_in_eager_without_dygraph_check,
_in_legacy_dygraph,
default_main_program,
device_guard,
in_dygraph_mode,
......@@ -534,17 +533,6 @@ class Optimizer:
current_lr.dtype,
place,
)
elif _in_legacy_dygraph():
_legacy_C_ops.fill_constant(
current_lr,
'value',
float(value),
'dtype',
current_lr.dtype,
'shape',
list(current_lr.shape),
)
else:
global_block = framework.default_main_program().global_block()
global_block.append_op(
......@@ -1042,10 +1030,9 @@ class Optimizer:
if self._dtype is None:
self._dtype = loss.dtype
if framework._non_static_mode():
if framework.in_dygraph_mode():
parameter_list = parameters if parameters else self._parameter_list
if framework.in_dygraph_mode():
# It is very time-consuming to call c++ functions in a loop on the python side.
# We put this part of the code on the c++ side to improve the speed in eager mode.
params_grads = []
......@@ -1053,17 +1040,6 @@ class Optimizer:
for index, grad in enumerate(grads):
if grad is not None:
params_grads.append((parameter_list[index], grad))
else:
# Keep the original code to support legacy mode.
# Delete the else branch when the legacy mode exits.
params_grads = []
for param in parameter_list:
if param.stop_gradient:
continue
if param._grad_ivar() is not None:
# create gradient tensor
grad_var = param._grad_ivar()
params_grads.append((param, grad_var))
else:
if callbacks is None:
callbacks = [error_clip_callback]
......@@ -1207,9 +1183,7 @@ class Optimizer:
if framework.in_dygraph_mode():
return _C_ops.add_n([grad, regularization_term])
elif framework._in_legacy_dygraph():
return _legacy_C_ops.sum([grad, regularization_term])
else:
new_grad = grad
if grad.type == core.VarDesc.VarType.SELECTED_ROWS:
# FIXME(zcd): If the grad is SELECTED_ROWS, after regularization,
......
......@@ -15,11 +15,11 @@
import warnings
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _C_ops
from ..fluid import core, framework, unique_name
from ..fluid.dygraph import no_grad
from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode
from ..fluid.framework import in_dygraph_mode
from ..fluid.layer_helper import LayerHelper
from .optimizer import Optimizer
......@@ -166,17 +166,7 @@ class SGD(Optimizer):
find_master,
)
return None
if _in_legacy_dygraph():
_legacy_C_ops.sgd(
param_and_grad[0],
lr,
param_and_grad[1],
master_weight,
param_and_grad[0],
master_weight,
)
return None
else:
assert isinstance(block, framework.Block)
# create the optimize op
inputs = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册