未验证 提交 f1072973 编写于 作者: 姜永久 提交者: GitHub

rm legacy fluid part4 (#49281)

* rm legacy fluid part4

* rm non_static_mode

* minor change

* modify initializer

* rm legacy for initializer

* fix dataloader test
上级 76f43f6d
...@@ -23,7 +23,7 @@ from . import core ...@@ -23,7 +23,7 @@ from . import core
from . import name_scope from . import name_scope
from .dygraph import base as imperative_base from .dygraph import base as imperative_base
from .data_feeder import check_variable_and_dtype from .data_feeder import check_variable_and_dtype
from .framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph from .framework import in_dygraph_mode
from .layer_helper import LayerHelper from .layer_helper import LayerHelper
from .framework import default_main_program from .framework import default_main_program
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops, _legacy_C_ops
...@@ -78,18 +78,16 @@ def _squared_l2_norm(x): ...@@ -78,18 +78,16 @@ def _squared_l2_norm(x):
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.squared_l2_norm(x) return _C_ops.squared_l2_norm(x)
elif _in_legacy_dygraph(): else:
return _legacy_C_ops.squared_l2_norm(x) op_type = 'squared_l2_norm'
check_variable_and_dtype(x, 'x', ['float32', 'float64'], op_type)
op_type = 'squared_l2_norm' helper = LayerHelper(op_type, **locals())
check_variable_and_dtype(x, 'x', ['float32', 'float64'], op_type) out = helper.create_variable_for_type_inference(x.dtype)
helper = LayerHelper(op_type, **locals())
out = helper.create_variable_for_type_inference(x.dtype)
inputs = {"X": x} inputs = {"X": x}
outputs = {'Out': out} outputs = {'Out': out}
helper.append_op(type=op_type, inputs=inputs, outputs=outputs) helper.append_op(type=op_type, inputs=inputs, outputs=outputs)
return out return out
class BaseErrorClipAttr: class BaseErrorClipAttr:
...@@ -196,7 +194,7 @@ class ClipGradBase: ...@@ -196,7 +194,7 @@ class ClipGradBase:
raise NotImplementedError raise NotImplementedError
def __call__(self, params_grads): def __call__(self, params_grads):
if framework._non_static_mode(): if in_dygraph_mode():
return self._dygraph_clip(params_grads) return self._dygraph_clip(params_grads)
else: else:
for p, g in params_grads: for p, g in params_grads:
......
...@@ -34,7 +34,7 @@ import paddle ...@@ -34,7 +34,7 @@ import paddle
import paddle.profiler as profiler import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode from paddle.profiler.utils import in_profiler_mode
from .. import core, layers from .. import core, layers
from ..framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph from ..framework import in_dygraph_mode
from ..multiprocess_utils import ( from ..multiprocess_utils import (
_set_SIGCHLD_handler, _set_SIGCHLD_handler,
MP_STATUS_CHECK_INTERVAL, MP_STATUS_CHECK_INTERVAL,
...@@ -303,28 +303,23 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): ...@@ -303,28 +303,23 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
) )
data = _restore_batch(data, self._structure_infos.pop(0)) data = _restore_batch(data, self._structure_infos.pop(0))
else: else:
if _in_legacy_dygraph(): # in static mode
data = self._reader.read_next_var_list() if self._return_list:
data = _restore_batch(data, self._structure_infos.pop(0)) data = self._reader.read_next_list()
else: # in static mode for i in range(len(data)):
if self._return_list: data[i] = data[i]._move_to_list()
data = self._reader.read_next_list() structs = [
for i in range(len(data)): self._structure_infos.pop(0)
data[i] = data[i]._move_to_list() for _ in range(len(self._places))
structs = [ ]
self._structure_infos.pop(0) data = [_restore_batch(d, s) for d, s in zip(data, structs)]
for _ in range(len(self._places)) # static graph organized data on multi-device with list, if
] # place number is 1, there is only 1 device, extra the data
data = [ # from list for devices to be compatible with dygraph mode
_restore_batch(d, s) for d, s in zip(data, structs) if len(self._places) == 1:
] data = data[0]
# static graph organized data on multi-device with list, if else:
# place number is 1, there is only 1 device, extra the data data = self._reader.read_next()
# from list for devices to be compatible with dygraph mode
if len(self._places) == 1:
data = data[0]
else:
data = self._reader.read_next()
benchmark().after_reader() benchmark().after_reader()
return data return data
...@@ -528,9 +523,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): ...@@ -528,9 +523,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
self._reader.read_next_list()[0] self._reader.read_next_list()[0]
) )
else: else:
if _in_legacy_dygraph(): if self._return_list:
self._reader.read_next_var_list()
elif self._return_list:
self._reader.read_next_list() self._reader.read_next_list()
else: else:
data = self._reader.read_next() data = self._reader.read_next()
...@@ -816,28 +809,22 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): ...@@ -816,28 +809,22 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
) )
data = _restore_batch(data, self._structure_infos.pop(0)) data = _restore_batch(data, self._structure_infos.pop(0))
else: else:
if _in_legacy_dygraph(): if self._return_list:
data = self._reader.read_next_var_list() data = self._reader.read_next_list()
data = _restore_batch(data, self._structure_infos.pop(0)) for i in range(len(data)):
data[i] = data[i]._move_to_list()
structs = [
self._structure_infos.pop(0)
for _ in range(len(self._places))
]
data = [_restore_batch(d, s) for d, s in zip(data, structs)]
# static graph organized data on multi-device with list, if
# place number is 1, there is only 1 device, extra the data
# from list for devices to be compatible with dygraph mode
if len(self._places) == 1:
data = data[0]
else: else:
if self._return_list: data = self._reader.read_next()
data = self._reader.read_next_list()
for i in range(len(data)):
data[i] = data[i]._move_to_list()
structs = [
self._structure_infos.pop(0)
for _ in range(len(self._places))
]
data = [
_restore_batch(d, s) for d, s in zip(data, structs)
]
# static graph organized data on multi-device with list, if
# place number is 1, there is only 1 device, extra the data
# from list for devices to be compatible with dygraph mode
if len(self._places) == 1:
data = data[0]
else:
data = self._reader.read_next()
self._on_output_batch() self._on_output_batch()
benchmark().after_reader() benchmark().after_reader()
return data return data
......
...@@ -17,9 +17,7 @@ import functools ...@@ -17,9 +17,7 @@ import functools
from . import framework from . import framework
from . import core from . import core
from .framework import ( from .framework import (
_non_static_mode,
in_dygraph_mode, in_dygraph_mode,
_in_legacy_dygraph,
default_main_program, default_main_program,
_current_expected_place, _current_expected_place,
) )
...@@ -191,21 +189,6 @@ class ConstantInitializer(Initializer): ...@@ -191,21 +189,6 @@ class ConstantInitializer(Initializer):
var, var.shape, str(float(self._value)), var.dtype, place var, var.shape, str(float(self._value)), var.dtype, place
) )
return None return None
elif _in_legacy_dygraph():
_legacy_C_ops.fill_constant(
var,
'value',
float(self._value),
'force_cpu',
self._force_cpu,
'dtype',
int(var.dtype),
'str_value',
str(float(self._value)),
'shape',
var.shape,
)
return None
else: else:
op = block.append_op( op = block.append_op(
type="fill_constant", type="fill_constant",
...@@ -307,46 +290,17 @@ class UniformInitializer(Initializer): ...@@ -307,46 +290,17 @@ class UniformInitializer(Initializer):
out_dtype = var.dtype out_dtype = var.dtype
out_var = var out_var = var
if framework._non_static_mode(): if in_dygraph_mode():
if in_dygraph_mode(): out_var = _C_ops.uniform(
out_var = _C_ops.uniform( var.shape,
var.shape, out_dtype,
out_dtype, self._low,
self._low, self._high,
self._high, self._seed,
self._seed, _current_expected_place(),
_current_expected_place(), )
)
elif _in_legacy_dygraph():
out_var = _legacy_C_ops.uniform_random(
'shape',
var.shape,
'min',
self._low,
'max',
self._high,
'seed',
self._seed,
'dtype',
out_dtype,
'diag_num',
self._diag_num,
'diag_step',
self._diag_step,
'diag_val',
self._diag_val,
)
if var.dtype == VarDesc.VarType.FP16: if var.dtype == VarDesc.VarType.FP16:
if in_dygraph_mode(): var_tmp = _C_ops.cast(out_var, var.dtype)
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var) var_tmp._share_underline_tensor_to(var)
else: else:
out_var._share_underline_tensor_to(var) out_var._share_underline_tensor_to(var)
...@@ -446,24 +400,6 @@ class NormalInitializer(Initializer): ...@@ -446,24 +400,6 @@ class NormalInitializer(Initializer):
out_var._share_underline_tensor_to(var) out_var._share_underline_tensor_to(var)
return None return None
if _in_legacy_dygraph():
out_var = _legacy_C_ops.gaussian_random(
'shape',
var.shape,
'dtype',
var.dtype,
'mean',
self._mean,
'std',
self._std_dev,
'seed',
self._seed,
'use_mkldnn',
False,
)
out_var._share_underline_tensor_to(var)
return None
else: else:
op = block.append_op( op = block.append_op(
type="gaussian_random", type="gaussian_random",
...@@ -559,27 +495,6 @@ class TruncatedNormalInitializer(Initializer): ...@@ -559,27 +495,6 @@ class TruncatedNormalInitializer(Initializer):
out_var._share_underline_tensor_to(var) out_var._share_underline_tensor_to(var)
return None return None
if _in_legacy_dygraph():
out_var = _legacy_C_ops.truncated_gaussian_random(
'shape',
var.shape,
'dtype',
out_dtype,
'mean',
self._mean,
'std',
self._std_dev,
'seed',
self._seed,
)
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
var_tmp = _legacy_C_ops.cast(
out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype
)
var_tmp._share_underline_tensor_to(var)
else:
out_var._share_underline_tensor_to(var)
return None
else: else:
op = block.append_op( op = block.append_op(
type="truncated_gaussian_random", type="truncated_gaussian_random",
...@@ -707,66 +622,29 @@ class XavierInitializer(Initializer): ...@@ -707,66 +622,29 @@ class XavierInitializer(Initializer):
out_dtype = var.dtype out_dtype = var.dtype
out_var = var out_var = var
if framework._non_static_mode(): if in_dygraph_mode():
if self._uniform: if self._uniform:
limit = math.sqrt(6.0 / float(fan_in + fan_out)) limit = math.sqrt(6.0 / float(fan_in + fan_out))
if in_dygraph_mode(): out_var = _C_ops.uniform(
out_var = _C_ops.uniform( out_var.shape,
out_var.shape, out_dtype,
out_dtype, -limit,
-limit, limit,
limit, self._seed,
self._seed, _current_expected_place(),
_current_expected_place(), )
)
elif _in_legacy_dygraph():
out_var = _legacy_C_ops.uniform_random(
'shape',
out_var.shape,
'min',
-limit,
'max',
limit,
'seed',
self._seed,
'dtype',
out_dtype,
)
else: else:
std = math.sqrt(2.0 / float(fan_in + fan_out)) std = math.sqrt(2.0 / float(fan_in + fan_out))
if in_dygraph_mode(): place = _current_expected_place()
place = _current_expected_place() out_var = _C_ops.gaussian(
out_var = _C_ops.gaussian( out_var.shape, 0.0, std, self._seed, out_dtype, place
out_var.shape, 0.0, std, self._seed, out_dtype, place )
)
else:
out_var = _legacy_C_ops.gaussian_random(
'shape',
out_var.shape,
'dtype',
out_dtype,
'mean',
0.0,
'std',
std,
'seed',
self._seed,
)
if var.dtype == VarDesc.VarType.FP16 or ( if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform var.dtype == VarDesc.VarType.BF16 and not self._uniform
): ):
if in_dygraph_mode(): var_tmp = _C_ops.cast(out_var, var.dtype)
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var) var_tmp._share_underline_tensor_to(var)
else: else:
out_var._share_underline_tensor_to(var) out_var._share_underline_tensor_to(var)
...@@ -918,67 +796,30 @@ class MSRAInitializer(Initializer): ...@@ -918,67 +796,30 @@ class MSRAInitializer(Initializer):
out_dtype = var.dtype out_dtype = var.dtype
out_var = var out_var = var
if framework._non_static_mode(): if in_dygraph_mode():
if self._uniform: if self._uniform:
gain = calculate_gain(self._nonlinearity, self._negative_slope) gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in)) limit = gain * math.sqrt(3.0 / float(fan_in))
if in_dygraph_mode(): out_var = _C_ops.uniform(
out_var = _C_ops.uniform( var.shape,
var.shape, out_dtype,
out_dtype, -limit,
-limit, limit,
limit, self._seed,
self._seed, _current_expected_place(),
_current_expected_place(), )
)
else:
out_var = _legacy_C_ops.uniform_random(
'shape',
out_var.shape,
'min',
-limit,
'max',
limit,
'seed',
self._seed,
'dtype',
int(out_dtype),
)
else: else:
gain = calculate_gain(self._nonlinearity, self._negative_slope) gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in)) std = gain / math.sqrt(float(fan_in))
if in_dygraph_mode(): place = _current_expected_place()
place = _current_expected_place() out_var = _C_ops.gaussian(
out_var = _C_ops.gaussian( out_var.shape, 0.0, std, self._seed, out_dtype, place
out_var.shape, 0.0, std, self._seed, out_dtype, place )
)
else:
out_var = _legacy_C_ops.gaussian_random(
'shape',
out_var.shape,
'dtype',
int(out_dtype),
'mean',
0.0,
'std',
std,
'seed',
self._seed,
)
if var.dtype == VarDesc.VarType.FP16 or ( if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform var.dtype == VarDesc.VarType.BF16 and not self._uniform
): ):
if in_dygraph_mode(): var_tmp = _C_ops.cast(out_var, var.dtype)
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var) var_tmp._share_underline_tensor_to(var)
else: else:
out_var._share_underline_tensor_to(var) out_var._share_underline_tensor_to(var)
...@@ -1145,40 +986,20 @@ class BilinearInitializer(Initializer): ...@@ -1145,40 +986,20 @@ class BilinearInitializer(Initializer):
if np.prod(shape) > 1024 * 1024: if np.prod(shape) > 1024 * 1024:
raise ValueError("The size of input is too big. ") raise ValueError("The size of input is too big. ")
if framework._non_static_mode(): if in_dygraph_mode():
if in_dygraph_mode(): _C_ops.assign_value_(
_C_ops.assign_value_( out_var,
out_var, list(shape),
list(shape), out_dtype,
out_dtype, values,
values, _current_expected_place(),
_current_expected_place(), )
)
elif _in_legacy_dygraph():
_legacy_C_ops.assign_value(
out_var,
'shape',
list(shape),
'dtype',
out_dtype,
value_name,
values,
)
if var.dtype in [ if var.dtype in [
VarDesc.VarType.FP16, VarDesc.VarType.FP16,
VarDesc.VarType.BF16, VarDesc.VarType.BF16,
VarDesc.VarType.FP64, VarDesc.VarType.FP64,
]: ]:
if in_dygraph_mode(): var_tmp = _C_ops.cast(out_var, var.dtype)
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var) var_tmp._share_underline_tensor_to(var)
else: else:
out_var._share_underline_tensor_to(var) out_var._share_underline_tensor_to(var)
...@@ -1285,36 +1106,16 @@ class NumpyArrayInitializer(Initializer): ...@@ -1285,36 +1106,16 @@ class NumpyArrayInitializer(Initializer):
"saving it to file and 'load_op' to load it" "saving it to file and 'load_op' to load it"
) )
if framework._non_static_mode(): if in_dygraph_mode():
if in_dygraph_mode(): _C_ops.assign_value_(
_C_ops.assign_value_( out_var,
out_var, list(self._value.shape),
list(self._value.shape), out_dtype,
out_dtype, values,
values, _current_expected_place(),
_current_expected_place(), )
)
elif _in_legacy_dygraph():
_legacy_C_ops.assign_value(
out_var,
'shape',
list(self._value.shape),
'dtype',
out_dtype,
value_name,
values,
)
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
if in_dygraph_mode(): var_tmp = _C_ops.cast(out_var, var.dtype)
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var) var_tmp._share_underline_tensor_to(var)
else: else:
out_var._share_underline_tensor_to(var) out_var._share_underline_tensor_to(var)
......
...@@ -62,7 +62,6 @@ from .wrapped_decorator import signature_safe_contextmanager ...@@ -62,7 +62,6 @@ from .wrapped_decorator import signature_safe_contextmanager
import warnings import warnings
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops, _legacy_C_ops
from ..fluid.framework import ( from ..fluid.framework import (
_in_legacy_dygraph,
in_dygraph_mode, in_dygraph_mode,
_current_expected_place, _current_expected_place,
) )
...@@ -130,7 +129,7 @@ class Optimizer: ...@@ -130,7 +129,7 @@ class Optimizer:
list(parameter_list) if parameter_list is not None else None list(parameter_list) if parameter_list is not None else None
) )
self._name = name self._name = name
if framework._non_static_mode(): if in_dygraph_mode():
if not isinstance( if not isinstance(
learning_rate, (float, LearningRateDecay, LRScheduler) learning_rate, (float, LearningRateDecay, LRScheduler)
): ):
...@@ -532,17 +531,6 @@ class Optimizer: ...@@ -532,17 +531,6 @@ class Optimizer:
current_lr.dtype, current_lr.dtype,
place, place,
) )
elif _in_legacy_dygraph():
_legacy_C_ops.fill_constant(
current_lr,
'value',
float(value),
'dtype',
current_lr.dtype,
'shape',
list(current_lr.shape),
)
else: else:
global_block = ( global_block = (
framework.default_main_program().global_block() framework.default_main_program().global_block()
...@@ -703,7 +691,7 @@ class Optimizer: ...@@ -703,7 +691,7 @@ class Optimizer:
name in self._accumulators name in self._accumulators
and param.name in self._accumulators[name] and param.name in self._accumulators[name]
): ):
if framework._non_static_mode(): if in_dygraph_mode():
return self._accumulators[name][param.name] return self._accumulators[name][param.name]
raise Exception( raise Exception(
"Accumulator {} already exists for parameter {}".format( "Accumulator {} already exists for parameter {}".format(
...@@ -723,7 +711,7 @@ class Optimizer: ...@@ -723,7 +711,7 @@ class Optimizer:
persistable=True, persistable=True,
dtype=dtype or param.dtype, dtype=dtype or param.dtype,
type=core.VarDesc.VarType.LOD_TENSOR type=core.VarDesc.VarType.LOD_TENSOR
if framework._non_static_mode() if in_dygraph_mode()
else (param.type if type is None else type), else (param.type if type is None else type),
shape=shape, shape=shape,
belong_to_optimizer=True, belong_to_optimizer=True,
...@@ -735,7 +723,7 @@ class Optimizer: ...@@ -735,7 +723,7 @@ class Optimizer:
var, initializer=Constant(value=float(fill_value)) var, initializer=Constant(value=float(fill_value))
) )
if framework._non_static_mode(): if in_dygraph_mode():
if len(self._accumulators_holder) > 0: if len(self._accumulators_holder) > 0:
assert ( assert (
var_name in self._accumulators_holder var_name in self._accumulators_holder
...@@ -770,7 +758,7 @@ class Optimizer: ...@@ -770,7 +758,7 @@ class Optimizer:
if self._name is not None: if self._name is not None:
name = self._name + "_" + name name = self._name + "_" + name
if name in self._global_accumulators: if name in self._global_accumulators:
if framework._non_static_mode(): if in_dygraph_mode():
return self._global_accumulators[name] return self._global_accumulators[name]
raise Exception("Global accumulator {} already exists".format(name)) raise Exception("Global accumulator {} already exists".format(name))
if shape is None: if shape is None:
...@@ -796,7 +784,7 @@ class Optimizer: ...@@ -796,7 +784,7 @@ class Optimizer:
var, initializer=Constant(value=float(fill_value)) var, initializer=Constant(value=float(fill_value))
) )
if framework._non_static_mode(): if in_dygraph_mode():
if len(self._accumulators_holder) > 0: if len(self._accumulators_holder) > 0:
assert ( assert (
var_name in self._accumulators_holder var_name in self._accumulators_holder
...@@ -911,7 +899,7 @@ class Optimizer: ...@@ -911,7 +899,7 @@ class Optimizer:
) )
self._create_global_learning_rate() self._create_global_learning_rate()
if framework._non_static_mode(): if in_dygraph_mode():
for param_and_grad in parameters_and_grads: for param_and_grad in parameters_and_grads:
if param_and_grad[1] is None: if param_and_grad[1] is None:
continue continue
...@@ -1018,7 +1006,7 @@ class Optimizer: ...@@ -1018,7 +1006,7 @@ class Optimizer:
See examples in ``apply_gradients``. See examples in ``apply_gradients``.
""" """
act_no_grad_set = None act_no_grad_set = None
if framework._non_static_mode(): if in_dygraph_mode():
pass pass
else: else:
act_no_grad_set = self._get_no_grad_set(loss, no_grad_set) act_no_grad_set = self._get_no_grad_set(loss, no_grad_set)
...@@ -1027,7 +1015,7 @@ class Optimizer: ...@@ -1027,7 +1015,7 @@ class Optimizer:
if self._dtype is None: if self._dtype is None:
self._dtype = loss.dtype self._dtype = loss.dtype
if framework._non_static_mode(): if in_dygraph_mode():
parameter_list = ( parameter_list = (
parameter_list if parameter_list else self._parameter_list parameter_list if parameter_list else self._parameter_list
) )
...@@ -1084,7 +1072,7 @@ class Optimizer: ...@@ -1084,7 +1072,7 @@ class Optimizer:
assert regularization_term is not None assert regularization_term is not None
if framework._non_static_mode(): if in_dygraph_mode():
return _legacy_C_ops.sum([grad, regularization_term]) return _legacy_C_ops.sum([grad, regularization_term])
new_grad = grad new_grad = grad
...@@ -1131,7 +1119,7 @@ class Optimizer: ...@@ -1131,7 +1119,7 @@ class Optimizer:
Exception: Unknown regularization type Exception: Unknown regularization type
""" """
params_and_grads = [] params_and_grads = []
if framework._non_static_mode(): if in_dygraph_mode():
for param, grad in parameters_and_grads: for param, grad in parameters_and_grads:
new_grad = self._create_regularization_of_grad( new_grad = self._create_regularization_of_grad(
param, grad, regularization param, grad, regularization
...@@ -1302,7 +1290,7 @@ class Optimizer: ...@@ -1302,7 +1290,7 @@ class Optimizer:
Returns: Returns:
list: A list of operators appended to the current program. list: A list of operators appended to the current program.
""" """
if framework._non_static_mode(): if in_dygraph_mode():
with program_guard( with program_guard(
framework.default_main_program(), framework.default_main_program(),
framework.default_startup_program(), framework.default_startup_program(),
...@@ -1562,42 +1550,32 @@ class SGDOptimizer(Optimizer): ...@@ -1562,42 +1550,32 @@ class SGDOptimizer(Optimizer):
find_master, find_master,
) )
return None return None
if _in_legacy_dygraph(): else:
_legacy_C_ops.sgd( assert isinstance(block, framework.Block)
param_and_grad[0], # create the optimize op
lr, inputs = {
param_and_grad[1], "Param": param_and_grad[0],
master_weight, "Grad": param_and_grad[1],
param_and_grad[0], "LearningRate": lr,
master_weight, }
)
return None
assert isinstance(block, framework.Block)
# create the optimize op
inputs = {
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": lr,
}
outputs = {"ParamOut": param_and_grad[0]} outputs = {"ParamOut": param_and_grad[0]}
attrs = {"multi_precision": find_master} attrs = {"multi_precision": find_master}
if find_master: if find_master:
inputs["MasterParam"] = master_weight inputs["MasterParam"] = master_weight
outputs["MasterParamOut"] = master_weight outputs["MasterParamOut"] = master_weight
sgd_op = block.append_op( sgd_op = block.append_op(
type=self.type, type=self.type,
inputs=inputs, inputs=inputs,
outputs=outputs, outputs=outputs,
attrs=attrs, attrs=attrs,
stop_gradient=True, stop_gradient=True,
) )
return sgd_op return sgd_op
class MomentumOptimizer(Optimizer): class MomentumOptimizer(Optimizer):
...@@ -1710,7 +1688,7 @@ class MomentumOptimizer(Optimizer): ...@@ -1710,7 +1688,7 @@ class MomentumOptimizer(Optimizer):
) )
lr = self._create_param_lr(param_and_grad) lr = self._create_param_lr(param_and_grad)
master_weight = None master_weight = None
if framework._non_static_mode(): if in_dygraph_mode():
_, _, _ = _legacy_C_ops.momentum( _, _, _ = _legacy_C_ops.momentum(
param_and_grad[0], param_and_grad[0],
param_and_grad[1], param_and_grad[1],
...@@ -1726,29 +1704,29 @@ class MomentumOptimizer(Optimizer): ...@@ -1726,29 +1704,29 @@ class MomentumOptimizer(Optimizer):
self._use_nesterov, self._use_nesterov,
) )
return None return None
else:
attrs = {"mu": self._momentum, "use_nesterov": self._use_nesterov}
inputs = {
"Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]],
"Velocity": [velocity_acc],
"LearningRate": [lr],
}
attrs = {"mu": self._momentum, "use_nesterov": self._use_nesterov} outputs = {
inputs = { "ParamOut": [param_and_grad[0]],
"Param": [param_and_grad[0]], "VelocityOut": [velocity_acc],
"Grad": [param_and_grad[1]], }
"Velocity": [velocity_acc], # create the momentum optimize op
"LearningRate": [lr], momentum_op = block.append_op(
} type=self.type,
inputs=inputs,
outputs = { outputs=outputs,
"ParamOut": [param_and_grad[0]], attrs=attrs,
"VelocityOut": [velocity_acc], stop_gradient=True,
} )
# create the momentum optimize op
momentum_op = block.append_op(
type=self.type,
inputs=inputs,
outputs=outputs,
attrs=attrs,
stop_gradient=True,
)
return momentum_op return momentum_op
class LarsMomentumOptimizer(Optimizer): class LarsMomentumOptimizer(Optimizer):
...@@ -1974,7 +1952,7 @@ class LarsMomentumOptimizer(Optimizer): ...@@ -1974,7 +1952,7 @@ class LarsMomentumOptimizer(Optimizer):
inputs["MasterParam"] = master_weight inputs["MasterParam"] = master_weight
outputs["MasterParamOut"] = master_weight outputs["MasterParamOut"] = master_weight
if framework._non_static_mode(): if in_dygraph_mode():
tmp, tmp2 = _legacy_C_ops.lars_momentum( tmp, tmp2 = _legacy_C_ops.lars_momentum(
[param_and_grad[0]], [param_and_grad[0]],
[param_and_grad[1]], [param_and_grad[1]],
...@@ -2123,18 +2101,6 @@ class AdagradOptimizer(Optimizer): ...@@ -2123,18 +2101,6 @@ class AdagradOptimizer(Optimizer):
self._epsilon, self._epsilon,
) )
return None return None
elif _in_legacy_dygraph():
_legacy_C_ops.adagrad(
param_and_grad[0],
param_and_grad[1],
moment_acc,
self._create_param_lr(param_and_grad),
param_and_grad[0],
moment_acc,
"epsilon",
self._epsilon,
)
return None
else: else:
# Create the adagrad optimizer op # Create the adagrad optimizer op
adagrad_op = block.append_op( adagrad_op = block.append_op(
...@@ -2430,7 +2396,7 @@ class AdamOptimizer(Optimizer): ...@@ -2430,7 +2396,7 @@ class AdamOptimizer(Optimizer):
lr = self._create_param_lr(param_and_grad) lr = self._create_param_lr(param_and_grad)
# create the adam optimize op # create the adam optimize op
if framework._non_static_mode(): if in_dygraph_mode():
_beta1 = ( _beta1 = (
self._beta1 self._beta1
if not isinstance(self._beta1, Variable) if not isinstance(self._beta1, Variable)
...@@ -2721,7 +2687,7 @@ class AdamaxOptimizer(Optimizer): ...@@ -2721,7 +2687,7 @@ class AdamaxOptimizer(Optimizer):
self._beta1_pow_acc_str, param_and_grad[0] self._beta1_pow_acc_str, param_and_grad[0]
) )
if framework.in_dygraph_mode(): if in_dygraph_mode():
_C_ops.adamax_( _C_ops.adamax_(
param_and_grad[0], param_and_grad[0],
param_and_grad[1], param_and_grad[1],
...@@ -2733,24 +2699,6 @@ class AdamaxOptimizer(Optimizer): ...@@ -2733,24 +2699,6 @@ class AdamaxOptimizer(Optimizer):
self._beta2, self._beta2,
self._epsilon, self._epsilon,
) )
elif framework._in_legacy_dygraph():
_legacy_C_ops.adamax(
param_and_grad[0],
param_and_grad[1],
self._create_param_lr(param_and_grad),
moment,
inf_norm,
beta1_pow_acc,
param_and_grad[0],
moment,
inf_norm,
"beta1",
self._beta1,
"beta2",
self._beta2,
"epsilon",
self._epsilon,
)
else: else:
# create the adamax optimize op # create the adamax optimize op
adamax_op = block.append_op( adamax_op = block.append_op(
...@@ -2790,15 +2738,8 @@ class AdamaxOptimizer(Optimizer): ...@@ -2790,15 +2738,8 @@ class AdamaxOptimizer(Optimizer):
beta1_pow_acc = self._get_accumulator( beta1_pow_acc = self._get_accumulator(
self._beta1_pow_acc_str, param self._beta1_pow_acc_str, param
) )
if framework._non_static_mode(): if in_dygraph_mode():
if framework.in_dygraph_mode(): tmp = _C_ops.scale(beta1_pow_acc, self._beta1, 0.0, True)
tmp = _C_ops.scale(
beta1_pow_acc, self._beta1, 0.0, True
)
else:
tmp = _legacy_C_ops.scale(
beta1_pow_acc, "scale", self._beta1
)
beta1_pow_acc.copy_(tmp, False) beta1_pow_acc.copy_(tmp, False)
else: else:
block.append_op( block.append_op(
...@@ -2891,7 +2832,7 @@ class DpsgdOptimizer(Optimizer): ...@@ -2891,7 +2832,7 @@ class DpsgdOptimizer(Optimizer):
if self._seed is None: if self._seed is None:
self._seed = 0 self._seed = 0
if framework._non_static_mode(): if in_dygraph_mode():
_legacy_C_ops.dpsgd( _legacy_C_ops.dpsgd(
param_and_grad[0], param_and_grad[0],
param_and_grad[1], param_and_grad[1],
...@@ -3023,7 +2964,7 @@ class DecayedAdagradOptimizer(Optimizer): ...@@ -3023,7 +2964,7 @@ class DecayedAdagradOptimizer(Optimizer):
self._moment_acc_str, param_and_grad[0] self._moment_acc_str, param_and_grad[0]
) )
if framework._non_static_mode(): if in_dygraph_mode():
_legacy_C_ops.decayed_adagrad( _legacy_C_ops.decayed_adagrad(
param_and_grad[0], param_and_grad[0],
param_and_grad[1], param_and_grad[1],
...@@ -3160,7 +3101,7 @@ class AdadeltaOptimizer(Optimizer): ...@@ -3160,7 +3101,7 @@ class AdadeltaOptimizer(Optimizer):
self._avg_squared_update_acc_str, param_and_grad[0] self._avg_squared_update_acc_str, param_and_grad[0]
) )
if framework.in_dygraph_mode(): if in_dygraph_mode():
_C_ops.adadelta_( _C_ops.adadelta_(
param_and_grad[0], param_and_grad[0],
param_and_grad[1], param_and_grad[1],
...@@ -3169,20 +3110,6 @@ class AdadeltaOptimizer(Optimizer): ...@@ -3169,20 +3110,6 @@ class AdadeltaOptimizer(Optimizer):
self._rho, self._rho,
self._epsilon, self._epsilon,
) )
elif framework._in_legacy_dygraph():
_legacy_C_ops.adadelta(
param_and_grad[0],
param_and_grad[1],
avg_squared_grad_acc,
avg_squared_update_acc,
param_and_grad[0],
avg_squared_grad_acc,
avg_squared_update_acc,
"epsilon",
self._epsilon,
"rho",
self._rho,
)
else: else:
# Create the adadelta optimizer op # Create the adadelta optimizer op
adadelta_op = block.append_op( adadelta_op = block.append_op(
...@@ -3387,27 +3314,6 @@ class RMSPropOptimizer(Optimizer): ...@@ -3387,27 +3314,6 @@ class RMSPropOptimizer(Optimizer):
self._centered, self._centered,
) )
return None return None
elif _in_legacy_dygraph():
_legacy_C_ops.rmsprop(
param_and_grad[0],
mean_square_acc,
self._create_param_lr(param_and_grad),
param_and_grad[1],
momentum_acc,
param_and_grad[0],
momentum_acc,
mean_square_acc,
mean_grad_acc,
"epsilon",
self._epsilon,
"decay",
self._rho,
"momentum",
self._momentum,
"centered",
self._centered,
)
return None
else: else:
rmsprop_op = block.append_op( rmsprop_op = block.append_op(
type=self.type, type=self.type,
...@@ -3581,7 +3487,7 @@ class FtrlOptimizer(Optimizer): ...@@ -3581,7 +3487,7 @@ class FtrlOptimizer(Optimizer):
linear_acc = self._get_accumulator( linear_acc = self._get_accumulator(
self._linear_acc_str, param_and_grad[0] self._linear_acc_str, param_and_grad[0]
) )
if framework._non_static_mode(): if in_dygraph_mode():
_legacy_C_ops.ftrl( _legacy_C_ops.ftrl(
param_and_grad[0], param_and_grad[0],
squared_acc, squared_acc,
...@@ -3763,7 +3669,7 @@ class LambOptimizer(AdamOptimizer): ...@@ -3763,7 +3669,7 @@ class LambOptimizer(AdamOptimizer):
weight_decay = self._weight_decay weight_decay = self._weight_decay
lr = self._create_param_lr(param_and_grad) lr = self._create_param_lr(param_and_grad)
master_weight = None master_weight = None
if framework._non_static_mode(): if in_dygraph_mode():
_legacy_C_ops.lamb( _legacy_C_ops.lamb(
param_and_grad[0], param_and_grad[0],
param_and_grad[1], param_and_grad[1],
...@@ -3940,7 +3846,7 @@ class ModelAverage(Optimizer): ...@@ -3940,7 +3846,7 @@ class ModelAverage(Optimizer):
regularization=None, regularization=None,
name=None, name=None,
): ):
if framework._non_static_mode(): if in_dygraph_mode():
raise Exception("In dygraph, don't support ModelAverage.") raise Exception("In dygraph, don't support ModelAverage.")
super().__init__(0.0, regularization=regularization, name=name) super().__init__(0.0, regularization=regularization, name=name)
self.average_window = average_window_rate self.average_window = average_window_rate
...@@ -4269,7 +4175,7 @@ class ExponentialMovingAverage: ...@@ -4269,7 +4175,7 @@ class ExponentialMovingAverage:
""" """
def __init__(self, decay=0.999, thres_steps=None, name=None): def __init__(self, decay=0.999, thres_steps=None, name=None):
if framework._non_static_mode(): if in_dygraph_mode():
raise Exception( raise Exception(
"In dygraph, don't support ExponentialMovingAverage." "In dygraph, don't support ExponentialMovingAverage."
) )
...@@ -4494,7 +4400,7 @@ class PipelineOptimizer: ...@@ -4494,7 +4400,7 @@ class PipelineOptimizer:
self._device = "npu" self._device = "npu"
elif core.is_compiled_with_cuda(): elif core.is_compiled_with_cuda():
self._device = "gpu" self._device = "gpu"
if framework._non_static_mode(): if in_dygraph_mode():
raise Exception("In dygraph, don't support PipelineOptimizer.") raise Exception("In dygraph, don't support PipelineOptimizer.")
valid_optimizers = ( valid_optimizers = (
Optimizer, Optimizer,
...@@ -6451,7 +6357,7 @@ class RecomputeOptimizer(Optimizer): ...@@ -6451,7 +6357,7 @@ class RecomputeOptimizer(Optimizer):
""" """
def __init__(self, optimizer): def __init__(self, optimizer):
if framework._non_static_mode(): if in_dygraph_mode():
raise Exception("In dygraph, don't support RecomputeOptimizer.") raise Exception("In dygraph, don't support RecomputeOptimizer.")
self._optimizer = optimizer self._optimizer = optimizer
self._checkpoints = None self._checkpoints = None
...@@ -7066,7 +6972,7 @@ class RecomputeOptimizer(Optimizer): ...@@ -7066,7 +6972,7 @@ class RecomputeOptimizer(Optimizer):
self._checkpoints is not None self._checkpoints is not None
), "You should call _set_checkpoints first" ), "You should call _set_checkpoints first"
if framework._non_static_mode(): if in_dygraph_mode():
raise NotImplementedError( raise NotImplementedError(
"DyGraph current does not support recompute" "DyGraph current does not support recompute"
) )
...@@ -7164,7 +7070,7 @@ class RecomputeOptimizer(Optimizer): ...@@ -7164,7 +7070,7 @@ class RecomputeOptimizer(Optimizer):
assert ( assert (
self._checkpoints is not None self._checkpoints is not None
), "You should call _set_checkpoints first" ), "You should call _set_checkpoints first"
if framework._non_static_mode(): if in_dygraph_mode():
raise NotImplementedError( raise NotImplementedError(
"DyGraph current does not support recompute" "DyGraph current does not support recompute"
) )
...@@ -7248,7 +7154,7 @@ class LookaheadOptimizer: ...@@ -7248,7 +7154,7 @@ class LookaheadOptimizer:
def __init__(self, inner_optimizer, alpha=0.5, k=5): def __init__(self, inner_optimizer, alpha=0.5, k=5):
if framework._non_static_mode(): if in_dygraph_mode():
raise Exception("In dygraph, don't support LookaheadOptimizer.") raise Exception("In dygraph, don't support LookaheadOptimizer.")
assert inner_optimizer is not None, "inner optimizer can not be None" assert inner_optimizer is not None, "inner optimizer can not be None"
assert ( assert (
...@@ -7427,7 +7333,7 @@ class GradientMergeOptimizer: ...@@ -7427,7 +7333,7 @@ class GradientMergeOptimizer:
GRAD_MERGE_COND_NAME = "grad_merge_cond_name" GRAD_MERGE_COND_NAME = "grad_merge_cond_name"
def __init__(self, inner_optimizer, k_steps=1, avg=True): def __init__(self, inner_optimizer, k_steps=1, avg=True):
if framework._non_static_mode(): if in_dygraph_mode():
raise Exception( raise Exception(
"In dygraph, we don't support GradientMergeOptimizer." "In dygraph, we don't support GradientMergeOptimizer."
"You can do Gradient merge by yourself with k-times forward + backward, " "You can do Gradient merge by yourself with k-times forward + backward, "
......
...@@ -347,6 +347,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -347,6 +347,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
) )
def test_with_dataloader(self): def test_with_dataloader(self):
paddle.disable_static()
for device in self.devices: for device in self.devices:
paddle.set_device(device) paddle.set_device(device)
# data loader # data loader
...@@ -377,6 +378,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -377,6 +378,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
if batch_id == 5: if batch_id == 5:
break break
paddle.enable_static()
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册