From f1072973a0fb0fc1c10e009bfca59412ffdec62d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?= <34344716+yjjiang11@users.noreply.github.com> Date: Wed, 28 Dec 2022 10:14:04 +0800 Subject: [PATCH] rm legacy fluid part4 (#49281) * rm legacy fluid part4 * rm non_static_mode * minor change * modify initializer * rm legacy for initializer * fix dataloader test --- python/paddle/fluid/clip.py | 24 +- .../fluid/dataloader/dataloader_iter.py | 81 ++--- python/paddle/fluid/initializer.py | 311 ++++-------------- python/paddle/fluid/optimizer.py | 240 ++++---------- .../test_custom_relu_op_xpu_setup.py | 2 + 5 files changed, 176 insertions(+), 482 deletions(-) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index d753b66fd7a..ffaa84ed3e5 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -23,7 +23,7 @@ from . import core from . import name_scope from .dygraph import base as imperative_base from .data_feeder import check_variable_and_dtype -from .framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph +from .framework import in_dygraph_mode from .layer_helper import LayerHelper from .framework import default_main_program from paddle import _C_ops, _legacy_C_ops @@ -78,18 +78,16 @@ def _squared_l2_norm(x): if in_dygraph_mode(): return _C_ops.squared_l2_norm(x) - elif _in_legacy_dygraph(): - return _legacy_C_ops.squared_l2_norm(x) - - op_type = 'squared_l2_norm' - check_variable_and_dtype(x, 'x', ['float32', 'float64'], op_type) - helper = LayerHelper(op_type, **locals()) - out = helper.create_variable_for_type_inference(x.dtype) + else: + op_type = 'squared_l2_norm' + check_variable_and_dtype(x, 'x', ['float32', 'float64'], op_type) + helper = LayerHelper(op_type, **locals()) + out = helper.create_variable_for_type_inference(x.dtype) - inputs = {"X": x} - outputs = {'Out': out} - helper.append_op(type=op_type, inputs=inputs, outputs=outputs) - return out + inputs = {"X": x} + outputs = {'Out': out} + helper.append_op(type=op_type, inputs=inputs, outputs=outputs) + return out class BaseErrorClipAttr: @@ -196,7 +194,7 @@ class ClipGradBase: raise NotImplementedError def __call__(self, params_grads): - if framework._non_static_mode(): + if in_dygraph_mode(): return self._dygraph_clip(params_grads) else: for p, g in params_grads: diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index 620bd513781..8687b696bbd 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -34,7 +34,7 @@ import paddle import paddle.profiler as profiler from paddle.profiler.utils import in_profiler_mode from .. import core, layers -from ..framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph +from ..framework import in_dygraph_mode from ..multiprocess_utils import ( _set_SIGCHLD_handler, MP_STATUS_CHECK_INTERVAL, @@ -303,28 +303,23 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): ) data = _restore_batch(data, self._structure_infos.pop(0)) else: - if _in_legacy_dygraph(): - data = self._reader.read_next_var_list() - data = _restore_batch(data, self._structure_infos.pop(0)) - else: # in static mode - if self._return_list: - data = self._reader.read_next_list() - for i in range(len(data)): - data[i] = data[i]._move_to_list() - structs = [ - self._structure_infos.pop(0) - for _ in range(len(self._places)) - ] - data = [ - _restore_batch(d, s) for d, s in zip(data, structs) - ] - # static graph organized data on multi-device with list, if - # place number is 1, there is only 1 device, extra the data - # from list for devices to be compatible with dygraph mode - if len(self._places) == 1: - data = data[0] - else: - data = self._reader.read_next() + # in static mode + if self._return_list: + data = self._reader.read_next_list() + for i in range(len(data)): + data[i] = data[i]._move_to_list() + structs = [ + self._structure_infos.pop(0) + for _ in range(len(self._places)) + ] + data = [_restore_batch(d, s) for d, s in zip(data, structs)] + # static graph organized data on multi-device with list, if + # place number is 1, there is only 1 device, extra the data + # from list for devices to be compatible with dygraph mode + if len(self._places) == 1: + data = data[0] + else: + data = self._reader.read_next() benchmark().after_reader() return data @@ -528,9 +523,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._reader.read_next_list()[0] ) else: - if _in_legacy_dygraph(): - self._reader.read_next_var_list() - elif self._return_list: + if self._return_list: self._reader.read_next_list() else: data = self._reader.read_next() @@ -816,28 +809,22 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): ) data = _restore_batch(data, self._structure_infos.pop(0)) else: - if _in_legacy_dygraph(): - data = self._reader.read_next_var_list() - data = _restore_batch(data, self._structure_infos.pop(0)) + if self._return_list: + data = self._reader.read_next_list() + for i in range(len(data)): + data[i] = data[i]._move_to_list() + structs = [ + self._structure_infos.pop(0) + for _ in range(len(self._places)) + ] + data = [_restore_batch(d, s) for d, s in zip(data, structs)] + # static graph organized data on multi-device with list, if + # place number is 1, there is only 1 device, extra the data + # from list for devices to be compatible with dygraph mode + if len(self._places) == 1: + data = data[0] else: - if self._return_list: - data = self._reader.read_next_list() - for i in range(len(data)): - data[i] = data[i]._move_to_list() - structs = [ - self._structure_infos.pop(0) - for _ in range(len(self._places)) - ] - data = [ - _restore_batch(d, s) for d, s in zip(data, structs) - ] - # static graph organized data on multi-device with list, if - # place number is 1, there is only 1 device, extra the data - # from list for devices to be compatible with dygraph mode - if len(self._places) == 1: - data = data[0] - else: - data = self._reader.read_next() + data = self._reader.read_next() self._on_output_batch() benchmark().after_reader() return data diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index b0c573936b8..849e52d074e 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -17,9 +17,7 @@ import functools from . import framework from . import core from .framework import ( - _non_static_mode, in_dygraph_mode, - _in_legacy_dygraph, default_main_program, _current_expected_place, ) @@ -191,21 +189,6 @@ class ConstantInitializer(Initializer): var, var.shape, str(float(self._value)), var.dtype, place ) return None - elif _in_legacy_dygraph(): - _legacy_C_ops.fill_constant( - var, - 'value', - float(self._value), - 'force_cpu', - self._force_cpu, - 'dtype', - int(var.dtype), - 'str_value', - str(float(self._value)), - 'shape', - var.shape, - ) - return None else: op = block.append_op( type="fill_constant", @@ -307,46 +290,17 @@ class UniformInitializer(Initializer): out_dtype = var.dtype out_var = var - if framework._non_static_mode(): - if in_dygraph_mode(): - out_var = _C_ops.uniform( - var.shape, - out_dtype, - self._low, - self._high, - self._seed, - _current_expected_place(), - ) - elif _in_legacy_dygraph(): - out_var = _legacy_C_ops.uniform_random( - 'shape', - var.shape, - 'min', - self._low, - 'max', - self._high, - 'seed', - self._seed, - 'dtype', - out_dtype, - 'diag_num', - self._diag_num, - 'diag_step', - self._diag_step, - 'diag_val', - self._diag_val, - ) + if in_dygraph_mode(): + out_var = _C_ops.uniform( + var.shape, + out_dtype, + self._low, + self._high, + self._seed, + _current_expected_place(), + ) if var.dtype == VarDesc.VarType.FP16: - if in_dygraph_mode(): - var_tmp = _C_ops.cast(out_var, var.dtype) - elif _in_legacy_dygraph(): - var_tmp = _legacy_C_ops.cast( - out_var, - 'in_dtype', - out_var.dtype, - 'out_dtype', - var.dtype, - ) + var_tmp = _C_ops.cast(out_var, var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) @@ -446,24 +400,6 @@ class NormalInitializer(Initializer): out_var._share_underline_tensor_to(var) return None - if _in_legacy_dygraph(): - out_var = _legacy_C_ops.gaussian_random( - 'shape', - var.shape, - 'dtype', - var.dtype, - 'mean', - self._mean, - 'std', - self._std_dev, - 'seed', - self._seed, - 'use_mkldnn', - False, - ) - - out_var._share_underline_tensor_to(var) - return None else: op = block.append_op( type="gaussian_random", @@ -559,27 +495,6 @@ class TruncatedNormalInitializer(Initializer): out_var._share_underline_tensor_to(var) return None - if _in_legacy_dygraph(): - out_var = _legacy_C_ops.truncated_gaussian_random( - 'shape', - var.shape, - 'dtype', - out_dtype, - 'mean', - self._mean, - 'std', - self._std_dev, - 'seed', - self._seed, - ) - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - var_tmp = _legacy_C_ops.cast( - out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype - ) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None else: op = block.append_op( type="truncated_gaussian_random", @@ -707,66 +622,29 @@ class XavierInitializer(Initializer): out_dtype = var.dtype out_var = var - if framework._non_static_mode(): + if in_dygraph_mode(): if self._uniform: limit = math.sqrt(6.0 / float(fan_in + fan_out)) - if in_dygraph_mode(): - out_var = _C_ops.uniform( - out_var.shape, - out_dtype, - -limit, - limit, - self._seed, - _current_expected_place(), - ) - elif _in_legacy_dygraph(): - out_var = _legacy_C_ops.uniform_random( - 'shape', - out_var.shape, - 'min', - -limit, - 'max', - limit, - 'seed', - self._seed, - 'dtype', - out_dtype, - ) + out_var = _C_ops.uniform( + out_var.shape, + out_dtype, + -limit, + limit, + self._seed, + _current_expected_place(), + ) else: std = math.sqrt(2.0 / float(fan_in + fan_out)) - if in_dygraph_mode(): - place = _current_expected_place() - out_var = _C_ops.gaussian( - out_var.shape, 0.0, std, self._seed, out_dtype, place - ) - else: - out_var = _legacy_C_ops.gaussian_random( - 'shape', - out_var.shape, - 'dtype', - out_dtype, - 'mean', - 0.0, - 'std', - std, - 'seed', - self._seed, - ) + place = _current_expected_place() + out_var = _C_ops.gaussian( + out_var.shape, 0.0, std, self._seed, out_dtype, place + ) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform ): - if in_dygraph_mode(): - var_tmp = _C_ops.cast(out_var, var.dtype) - elif _in_legacy_dygraph(): - var_tmp = _legacy_C_ops.cast( - out_var, - 'in_dtype', - out_var.dtype, - 'out_dtype', - var.dtype, - ) + var_tmp = _C_ops.cast(out_var, var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) @@ -918,67 +796,30 @@ class MSRAInitializer(Initializer): out_dtype = var.dtype out_var = var - if framework._non_static_mode(): + if in_dygraph_mode(): if self._uniform: gain = calculate_gain(self._nonlinearity, self._negative_slope) limit = gain * math.sqrt(3.0 / float(fan_in)) - if in_dygraph_mode(): - out_var = _C_ops.uniform( - var.shape, - out_dtype, - -limit, - limit, - self._seed, - _current_expected_place(), - ) - else: - out_var = _legacy_C_ops.uniform_random( - 'shape', - out_var.shape, - 'min', - -limit, - 'max', - limit, - 'seed', - self._seed, - 'dtype', - int(out_dtype), - ) + out_var = _C_ops.uniform( + var.shape, + out_dtype, + -limit, + limit, + self._seed, + _current_expected_place(), + ) else: gain = calculate_gain(self._nonlinearity, self._negative_slope) std = gain / math.sqrt(float(fan_in)) - if in_dygraph_mode(): - place = _current_expected_place() - out_var = _C_ops.gaussian( - out_var.shape, 0.0, std, self._seed, out_dtype, place - ) - else: - out_var = _legacy_C_ops.gaussian_random( - 'shape', - out_var.shape, - 'dtype', - int(out_dtype), - 'mean', - 0.0, - 'std', - std, - 'seed', - self._seed, - ) + place = _current_expected_place() + out_var = _C_ops.gaussian( + out_var.shape, 0.0, std, self._seed, out_dtype, place + ) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform ): - if in_dygraph_mode(): - var_tmp = _C_ops.cast(out_var, var.dtype) - elif _in_legacy_dygraph(): - var_tmp = _legacy_C_ops.cast( - out_var, - 'in_dtype', - out_var.dtype, - 'out_dtype', - var.dtype, - ) + var_tmp = _C_ops.cast(out_var, var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) @@ -1145,40 +986,20 @@ class BilinearInitializer(Initializer): if np.prod(shape) > 1024 * 1024: raise ValueError("The size of input is too big. ") - if framework._non_static_mode(): - if in_dygraph_mode(): - _C_ops.assign_value_( - out_var, - list(shape), - out_dtype, - values, - _current_expected_place(), - ) - elif _in_legacy_dygraph(): - _legacy_C_ops.assign_value( - out_var, - 'shape', - list(shape), - 'dtype', - out_dtype, - value_name, - values, - ) + if in_dygraph_mode(): + _C_ops.assign_value_( + out_var, + list(shape), + out_dtype, + values, + _current_expected_place(), + ) if var.dtype in [ VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64, ]: - if in_dygraph_mode(): - var_tmp = _C_ops.cast(out_var, var.dtype) - elif _in_legacy_dygraph(): - var_tmp = _legacy_C_ops.cast( - out_var, - 'in_dtype', - out_var.dtype, - 'out_dtype', - var.dtype, - ) + var_tmp = _C_ops.cast(out_var, var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) @@ -1285,36 +1106,16 @@ class NumpyArrayInitializer(Initializer): "saving it to file and 'load_op' to load it" ) - if framework._non_static_mode(): - if in_dygraph_mode(): - _C_ops.assign_value_( - out_var, - list(self._value.shape), - out_dtype, - values, - _current_expected_place(), - ) - elif _in_legacy_dygraph(): - _legacy_C_ops.assign_value( - out_var, - 'shape', - list(self._value.shape), - 'dtype', - out_dtype, - value_name, - values, - ) + if in_dygraph_mode(): + _C_ops.assign_value_( + out_var, + list(self._value.shape), + out_dtype, + values, + _current_expected_place(), + ) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - if in_dygraph_mode(): - var_tmp = _C_ops.cast(out_var, var.dtype) - elif _in_legacy_dygraph(): - var_tmp = _legacy_C_ops.cast( - out_var, - 'in_dtype', - out_var.dtype, - 'out_dtype', - var.dtype, - ) + var_tmp = _C_ops.cast(out_var, var.dtype) var_tmp._share_underline_tensor_to(var) else: out_var._share_underline_tensor_to(var) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index af085a357e6..4528ea12771 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -62,7 +62,6 @@ from .wrapped_decorator import signature_safe_contextmanager import warnings from paddle import _C_ops, _legacy_C_ops from ..fluid.framework import ( - _in_legacy_dygraph, in_dygraph_mode, _current_expected_place, ) @@ -130,7 +129,7 @@ class Optimizer: list(parameter_list) if parameter_list is not None else None ) self._name = name - if framework._non_static_mode(): + if in_dygraph_mode(): if not isinstance( learning_rate, (float, LearningRateDecay, LRScheduler) ): @@ -532,17 +531,6 @@ class Optimizer: current_lr.dtype, place, ) - - elif _in_legacy_dygraph(): - _legacy_C_ops.fill_constant( - current_lr, - 'value', - float(value), - 'dtype', - current_lr.dtype, - 'shape', - list(current_lr.shape), - ) else: global_block = ( framework.default_main_program().global_block() @@ -703,7 +691,7 @@ class Optimizer: name in self._accumulators and param.name in self._accumulators[name] ): - if framework._non_static_mode(): + if in_dygraph_mode(): return self._accumulators[name][param.name] raise Exception( "Accumulator {} already exists for parameter {}".format( @@ -723,7 +711,7 @@ class Optimizer: persistable=True, dtype=dtype or param.dtype, type=core.VarDesc.VarType.LOD_TENSOR - if framework._non_static_mode() + if in_dygraph_mode() else (param.type if type is None else type), shape=shape, belong_to_optimizer=True, @@ -735,7 +723,7 @@ class Optimizer: var, initializer=Constant(value=float(fill_value)) ) - if framework._non_static_mode(): + if in_dygraph_mode(): if len(self._accumulators_holder) > 0: assert ( var_name in self._accumulators_holder @@ -770,7 +758,7 @@ class Optimizer: if self._name is not None: name = self._name + "_" + name if name in self._global_accumulators: - if framework._non_static_mode(): + if in_dygraph_mode(): return self._global_accumulators[name] raise Exception("Global accumulator {} already exists".format(name)) if shape is None: @@ -796,7 +784,7 @@ class Optimizer: var, initializer=Constant(value=float(fill_value)) ) - if framework._non_static_mode(): + if in_dygraph_mode(): if len(self._accumulators_holder) > 0: assert ( var_name in self._accumulators_holder @@ -911,7 +899,7 @@ class Optimizer: ) self._create_global_learning_rate() - if framework._non_static_mode(): + if in_dygraph_mode(): for param_and_grad in parameters_and_grads: if param_and_grad[1] is None: continue @@ -1018,7 +1006,7 @@ class Optimizer: See examples in ``apply_gradients``. """ act_no_grad_set = None - if framework._non_static_mode(): + if in_dygraph_mode(): pass else: act_no_grad_set = self._get_no_grad_set(loss, no_grad_set) @@ -1027,7 +1015,7 @@ class Optimizer: if self._dtype is None: self._dtype = loss.dtype - if framework._non_static_mode(): + if in_dygraph_mode(): parameter_list = ( parameter_list if parameter_list else self._parameter_list ) @@ -1084,7 +1072,7 @@ class Optimizer: assert regularization_term is not None - if framework._non_static_mode(): + if in_dygraph_mode(): return _legacy_C_ops.sum([grad, regularization_term]) new_grad = grad @@ -1131,7 +1119,7 @@ class Optimizer: Exception: Unknown regularization type """ params_and_grads = [] - if framework._non_static_mode(): + if in_dygraph_mode(): for param, grad in parameters_and_grads: new_grad = self._create_regularization_of_grad( param, grad, regularization @@ -1302,7 +1290,7 @@ class Optimizer: Returns: list: A list of operators appended to the current program. """ - if framework._non_static_mode(): + if in_dygraph_mode(): with program_guard( framework.default_main_program(), framework.default_startup_program(), @@ -1562,42 +1550,32 @@ class SGDOptimizer(Optimizer): find_master, ) return None - if _in_legacy_dygraph(): - _legacy_C_ops.sgd( - param_and_grad[0], - lr, - param_and_grad[1], - master_weight, - param_and_grad[0], - master_weight, - ) - return None - - assert isinstance(block, framework.Block) - # create the optimize op - inputs = { - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "LearningRate": lr, - } + else: + assert isinstance(block, framework.Block) + # create the optimize op + inputs = { + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "LearningRate": lr, + } - outputs = {"ParamOut": param_and_grad[0]} + outputs = {"ParamOut": param_and_grad[0]} - attrs = {"multi_precision": find_master} + attrs = {"multi_precision": find_master} - if find_master: - inputs["MasterParam"] = master_weight - outputs["MasterParamOut"] = master_weight + if find_master: + inputs["MasterParam"] = master_weight + outputs["MasterParamOut"] = master_weight - sgd_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True, - ) + sgd_op = block.append_op( + type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True, + ) - return sgd_op + return sgd_op class MomentumOptimizer(Optimizer): @@ -1710,7 +1688,7 @@ class MomentumOptimizer(Optimizer): ) lr = self._create_param_lr(param_and_grad) master_weight = None - if framework._non_static_mode(): + if in_dygraph_mode(): _, _, _ = _legacy_C_ops.momentum( param_and_grad[0], param_and_grad[1], @@ -1726,29 +1704,29 @@ class MomentumOptimizer(Optimizer): self._use_nesterov, ) return None + else: + attrs = {"mu": self._momentum, "use_nesterov": self._use_nesterov} + inputs = { + "Param": [param_and_grad[0]], + "Grad": [param_and_grad[1]], + "Velocity": [velocity_acc], + "LearningRate": [lr], + } - attrs = {"mu": self._momentum, "use_nesterov": self._use_nesterov} - inputs = { - "Param": [param_and_grad[0]], - "Grad": [param_and_grad[1]], - "Velocity": [velocity_acc], - "LearningRate": [lr], - } - - outputs = { - "ParamOut": [param_and_grad[0]], - "VelocityOut": [velocity_acc], - } - # create the momentum optimize op - momentum_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True, - ) + outputs = { + "ParamOut": [param_and_grad[0]], + "VelocityOut": [velocity_acc], + } + # create the momentum optimize op + momentum_op = block.append_op( + type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True, + ) - return momentum_op + return momentum_op class LarsMomentumOptimizer(Optimizer): @@ -1974,7 +1952,7 @@ class LarsMomentumOptimizer(Optimizer): inputs["MasterParam"] = master_weight outputs["MasterParamOut"] = master_weight - if framework._non_static_mode(): + if in_dygraph_mode(): tmp, tmp2 = _legacy_C_ops.lars_momentum( [param_and_grad[0]], [param_and_grad[1]], @@ -2123,18 +2101,6 @@ class AdagradOptimizer(Optimizer): self._epsilon, ) return None - elif _in_legacy_dygraph(): - _legacy_C_ops.adagrad( - param_and_grad[0], - param_and_grad[1], - moment_acc, - self._create_param_lr(param_and_grad), - param_and_grad[0], - moment_acc, - "epsilon", - self._epsilon, - ) - return None else: # Create the adagrad optimizer op adagrad_op = block.append_op( @@ -2430,7 +2396,7 @@ class AdamOptimizer(Optimizer): lr = self._create_param_lr(param_and_grad) # create the adam optimize op - if framework._non_static_mode(): + if in_dygraph_mode(): _beta1 = ( self._beta1 if not isinstance(self._beta1, Variable) @@ -2721,7 +2687,7 @@ class AdamaxOptimizer(Optimizer): self._beta1_pow_acc_str, param_and_grad[0] ) - if framework.in_dygraph_mode(): + if in_dygraph_mode(): _C_ops.adamax_( param_and_grad[0], param_and_grad[1], @@ -2733,24 +2699,6 @@ class AdamaxOptimizer(Optimizer): self._beta2, self._epsilon, ) - elif framework._in_legacy_dygraph(): - _legacy_C_ops.adamax( - param_and_grad[0], - param_and_grad[1], - self._create_param_lr(param_and_grad), - moment, - inf_norm, - beta1_pow_acc, - param_and_grad[0], - moment, - inf_norm, - "beta1", - self._beta1, - "beta2", - self._beta2, - "epsilon", - self._epsilon, - ) else: # create the adamax optimize op adamax_op = block.append_op( @@ -2790,15 +2738,8 @@ class AdamaxOptimizer(Optimizer): beta1_pow_acc = self._get_accumulator( self._beta1_pow_acc_str, param ) - if framework._non_static_mode(): - if framework.in_dygraph_mode(): - tmp = _C_ops.scale( - beta1_pow_acc, self._beta1, 0.0, True - ) - else: - tmp = _legacy_C_ops.scale( - beta1_pow_acc, "scale", self._beta1 - ) + if in_dygraph_mode(): + tmp = _C_ops.scale(beta1_pow_acc, self._beta1, 0.0, True) beta1_pow_acc.copy_(tmp, False) else: block.append_op( @@ -2891,7 +2832,7 @@ class DpsgdOptimizer(Optimizer): if self._seed is None: self._seed = 0 - if framework._non_static_mode(): + if in_dygraph_mode(): _legacy_C_ops.dpsgd( param_and_grad[0], param_and_grad[1], @@ -3023,7 +2964,7 @@ class DecayedAdagradOptimizer(Optimizer): self._moment_acc_str, param_and_grad[0] ) - if framework._non_static_mode(): + if in_dygraph_mode(): _legacy_C_ops.decayed_adagrad( param_and_grad[0], param_and_grad[1], @@ -3160,7 +3101,7 @@ class AdadeltaOptimizer(Optimizer): self._avg_squared_update_acc_str, param_and_grad[0] ) - if framework.in_dygraph_mode(): + if in_dygraph_mode(): _C_ops.adadelta_( param_and_grad[0], param_and_grad[1], @@ -3169,20 +3110,6 @@ class AdadeltaOptimizer(Optimizer): self._rho, self._epsilon, ) - elif framework._in_legacy_dygraph(): - _legacy_C_ops.adadelta( - param_and_grad[0], - param_and_grad[1], - avg_squared_grad_acc, - avg_squared_update_acc, - param_and_grad[0], - avg_squared_grad_acc, - avg_squared_update_acc, - "epsilon", - self._epsilon, - "rho", - self._rho, - ) else: # Create the adadelta optimizer op adadelta_op = block.append_op( @@ -3387,27 +3314,6 @@ class RMSPropOptimizer(Optimizer): self._centered, ) return None - elif _in_legacy_dygraph(): - _legacy_C_ops.rmsprop( - param_and_grad[0], - mean_square_acc, - self._create_param_lr(param_and_grad), - param_and_grad[1], - momentum_acc, - param_and_grad[0], - momentum_acc, - mean_square_acc, - mean_grad_acc, - "epsilon", - self._epsilon, - "decay", - self._rho, - "momentum", - self._momentum, - "centered", - self._centered, - ) - return None else: rmsprop_op = block.append_op( type=self.type, @@ -3581,7 +3487,7 @@ class FtrlOptimizer(Optimizer): linear_acc = self._get_accumulator( self._linear_acc_str, param_and_grad[0] ) - if framework._non_static_mode(): + if in_dygraph_mode(): _legacy_C_ops.ftrl( param_and_grad[0], squared_acc, @@ -3763,7 +3669,7 @@ class LambOptimizer(AdamOptimizer): weight_decay = self._weight_decay lr = self._create_param_lr(param_and_grad) master_weight = None - if framework._non_static_mode(): + if in_dygraph_mode(): _legacy_C_ops.lamb( param_and_grad[0], param_and_grad[1], @@ -3940,7 +3846,7 @@ class ModelAverage(Optimizer): regularization=None, name=None, ): - if framework._non_static_mode(): + if in_dygraph_mode(): raise Exception("In dygraph, don't support ModelAverage.") super().__init__(0.0, regularization=regularization, name=name) self.average_window = average_window_rate @@ -4269,7 +4175,7 @@ class ExponentialMovingAverage: """ def __init__(self, decay=0.999, thres_steps=None, name=None): - if framework._non_static_mode(): + if in_dygraph_mode(): raise Exception( "In dygraph, don't support ExponentialMovingAverage." ) @@ -4494,7 +4400,7 @@ class PipelineOptimizer: self._device = "npu" elif core.is_compiled_with_cuda(): self._device = "gpu" - if framework._non_static_mode(): + if in_dygraph_mode(): raise Exception("In dygraph, don't support PipelineOptimizer.") valid_optimizers = ( Optimizer, @@ -6451,7 +6357,7 @@ class RecomputeOptimizer(Optimizer): """ def __init__(self, optimizer): - if framework._non_static_mode(): + if in_dygraph_mode(): raise Exception("In dygraph, don't support RecomputeOptimizer.") self._optimizer = optimizer self._checkpoints = None @@ -7066,7 +6972,7 @@ class RecomputeOptimizer(Optimizer): self._checkpoints is not None ), "You should call _set_checkpoints first" - if framework._non_static_mode(): + if in_dygraph_mode(): raise NotImplementedError( "DyGraph current does not support recompute" ) @@ -7164,7 +7070,7 @@ class RecomputeOptimizer(Optimizer): assert ( self._checkpoints is not None ), "You should call _set_checkpoints first" - if framework._non_static_mode(): + if in_dygraph_mode(): raise NotImplementedError( "DyGraph current does not support recompute" ) @@ -7248,7 +7154,7 @@ class LookaheadOptimizer: def __init__(self, inner_optimizer, alpha=0.5, k=5): - if framework._non_static_mode(): + if in_dygraph_mode(): raise Exception("In dygraph, don't support LookaheadOptimizer.") assert inner_optimizer is not None, "inner optimizer can not be None" assert ( @@ -7427,7 +7333,7 @@ class GradientMergeOptimizer: GRAD_MERGE_COND_NAME = "grad_merge_cond_name" def __init__(self, inner_optimizer, k_steps=1, avg=True): - if framework._non_static_mode(): + if in_dygraph_mode(): raise Exception( "In dygraph, we don't support GradientMergeOptimizer." "You can do Gradient merge by yourself with k-times forward + backward, " diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py index 69a30582477..655bdeca022 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py @@ -347,6 +347,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ) def test_with_dataloader(self): + paddle.disable_static() for device in self.devices: paddle.set_device(device) # data loader @@ -377,6 +378,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): if batch_id == 5: break + paddle.enable_static() if __name__ == '__main__': -- GitLab