未验证 提交 f1072973 编写于 作者: 姜永久 提交者: GitHub

rm legacy fluid part4 (#49281)

* rm legacy fluid part4

* rm non_static_mode

* minor change

* modify initializer

* rm legacy for initializer

* fix dataloader test
上级 76f43f6d
......@@ -23,7 +23,7 @@ from . import core
from . import name_scope
from .dygraph import base as imperative_base
from .data_feeder import check_variable_and_dtype
from .framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph
from .framework import in_dygraph_mode
from .layer_helper import LayerHelper
from .framework import default_main_program
from paddle import _C_ops, _legacy_C_ops
......@@ -78,9 +78,7 @@ def _squared_l2_norm(x):
if in_dygraph_mode():
return _C_ops.squared_l2_norm(x)
elif _in_legacy_dygraph():
return _legacy_C_ops.squared_l2_norm(x)
else:
op_type = 'squared_l2_norm'
check_variable_and_dtype(x, 'x', ['float32', 'float64'], op_type)
helper = LayerHelper(op_type, **locals())
......@@ -196,7 +194,7 @@ class ClipGradBase:
raise NotImplementedError
def __call__(self, params_grads):
if framework._non_static_mode():
if in_dygraph_mode():
return self._dygraph_clip(params_grads)
else:
for p, g in params_grads:
......
......@@ -34,7 +34,7 @@ import paddle
import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode
from .. import core, layers
from ..framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph
from ..framework import in_dygraph_mode
from ..multiprocess_utils import (
_set_SIGCHLD_handler,
MP_STATUS_CHECK_INTERVAL,
......@@ -303,10 +303,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
)
data = _restore_batch(data, self._structure_infos.pop(0))
else:
if _in_legacy_dygraph():
data = self._reader.read_next_var_list()
data = _restore_batch(data, self._structure_infos.pop(0))
else: # in static mode
# in static mode
if self._return_list:
data = self._reader.read_next_list()
for i in range(len(data)):
......@@ -315,9 +312,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
self._structure_infos.pop(0)
for _ in range(len(self._places))
]
data = [
_restore_batch(d, s) for d, s in zip(data, structs)
]
data = [_restore_batch(d, s) for d, s in zip(data, structs)]
# static graph organized data on multi-device with list, if
# place number is 1, there is only 1 device, extra the data
# from list for devices to be compatible with dygraph mode
......@@ -528,9 +523,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
self._reader.read_next_list()[0]
)
else:
if _in_legacy_dygraph():
self._reader.read_next_var_list()
elif self._return_list:
if self._return_list:
self._reader.read_next_list()
else:
data = self._reader.read_next()
......@@ -815,10 +808,6 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
self._reader.read_next_list()[0]
)
data = _restore_batch(data, self._structure_infos.pop(0))
else:
if _in_legacy_dygraph():
data = self._reader.read_next_var_list()
data = _restore_batch(data, self._structure_infos.pop(0))
else:
if self._return_list:
data = self._reader.read_next_list()
......@@ -828,9 +817,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
self._structure_infos.pop(0)
for _ in range(len(self._places))
]
data = [
_restore_batch(d, s) for d, s in zip(data, structs)
]
data = [_restore_batch(d, s) for d, s in zip(data, structs)]
# static graph organized data on multi-device with list, if
# place number is 1, there is only 1 device, extra the data
# from list for devices to be compatible with dygraph mode
......
......@@ -17,9 +17,7 @@ import functools
from . import framework
from . import core
from .framework import (
_non_static_mode,
in_dygraph_mode,
_in_legacy_dygraph,
default_main_program,
_current_expected_place,
)
......@@ -191,21 +189,6 @@ class ConstantInitializer(Initializer):
var, var.shape, str(float(self._value)), var.dtype, place
)
return None
elif _in_legacy_dygraph():
_legacy_C_ops.fill_constant(
var,
'value',
float(self._value),
'force_cpu',
self._force_cpu,
'dtype',
int(var.dtype),
'str_value',
str(float(self._value)),
'shape',
var.shape,
)
return None
else:
op = block.append_op(
type="fill_constant",
......@@ -307,7 +290,6 @@ class UniformInitializer(Initializer):
out_dtype = var.dtype
out_var = var
if framework._non_static_mode():
if in_dygraph_mode():
out_var = _C_ops.uniform(
var.shape,
......@@ -317,36 +299,8 @@ class UniformInitializer(Initializer):
self._seed,
_current_expected_place(),
)
elif _in_legacy_dygraph():
out_var = _legacy_C_ops.uniform_random(
'shape',
var.shape,
'min',
self._low,
'max',
self._high,
'seed',
self._seed,
'dtype',
out_dtype,
'diag_num',
self._diag_num,
'diag_step',
self._diag_step,
'diag_val',
self._diag_val,
)
if var.dtype == VarDesc.VarType.FP16:
if in_dygraph_mode():
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var)
else:
out_var._share_underline_tensor_to(var)
......@@ -446,24 +400,6 @@ class NormalInitializer(Initializer):
out_var._share_underline_tensor_to(var)
return None
if _in_legacy_dygraph():
out_var = _legacy_C_ops.gaussian_random(
'shape',
var.shape,
'dtype',
var.dtype,
'mean',
self._mean,
'std',
self._std_dev,
'seed',
self._seed,
'use_mkldnn',
False,
)
out_var._share_underline_tensor_to(var)
return None
else:
op = block.append_op(
type="gaussian_random",
......@@ -559,27 +495,6 @@ class TruncatedNormalInitializer(Initializer):
out_var._share_underline_tensor_to(var)
return None
if _in_legacy_dygraph():
out_var = _legacy_C_ops.truncated_gaussian_random(
'shape',
var.shape,
'dtype',
out_dtype,
'mean',
self._mean,
'std',
self._std_dev,
'seed',
self._seed,
)
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
var_tmp = _legacy_C_ops.cast(
out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype
)
var_tmp._share_underline_tensor_to(var)
else:
out_var._share_underline_tensor_to(var)
return None
else:
op = block.append_op(
type="truncated_gaussian_random",
......@@ -707,10 +622,9 @@ class XavierInitializer(Initializer):
out_dtype = var.dtype
out_var = var
if framework._non_static_mode():
if in_dygraph_mode():
if self._uniform:
limit = math.sqrt(6.0 / float(fan_in + fan_out))
if in_dygraph_mode():
out_var = _C_ops.uniform(
out_var.shape,
out_dtype,
......@@ -719,54 +633,18 @@ class XavierInitializer(Initializer):
self._seed,
_current_expected_place(),
)
elif _in_legacy_dygraph():
out_var = _legacy_C_ops.uniform_random(
'shape',
out_var.shape,
'min',
-limit,
'max',
limit,
'seed',
self._seed,
'dtype',
out_dtype,
)
else:
std = math.sqrt(2.0 / float(fan_in + fan_out))
if in_dygraph_mode():
place = _current_expected_place()
out_var = _C_ops.gaussian(
out_var.shape, 0.0, std, self._seed, out_dtype, place
)
else:
out_var = _legacy_C_ops.gaussian_random(
'shape',
out_var.shape,
'dtype',
out_dtype,
'mean',
0.0,
'std',
std,
'seed',
self._seed,
)
if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform
):
if in_dygraph_mode():
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var)
else:
out_var._share_underline_tensor_to(var)
......@@ -918,11 +796,10 @@ class MSRAInitializer(Initializer):
out_dtype = var.dtype
out_var = var
if framework._non_static_mode():
if in_dygraph_mode():
if self._uniform:
gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
if in_dygraph_mode():
out_var = _C_ops.uniform(
var.shape,
out_dtype,
......@@ -931,54 +808,18 @@ class MSRAInitializer(Initializer):
self._seed,
_current_expected_place(),
)
else:
out_var = _legacy_C_ops.uniform_random(
'shape',
out_var.shape,
'min',
-limit,
'max',
limit,
'seed',
self._seed,
'dtype',
int(out_dtype),
)
else:
gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
if in_dygraph_mode():
place = _current_expected_place()
out_var = _C_ops.gaussian(
out_var.shape, 0.0, std, self._seed, out_dtype, place
)
else:
out_var = _legacy_C_ops.gaussian_random(
'shape',
out_var.shape,
'dtype',
int(out_dtype),
'mean',
0.0,
'std',
std,
'seed',
self._seed,
)
if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform
):
if in_dygraph_mode():
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var)
else:
out_var._share_underline_tensor_to(var)
......@@ -1145,7 +986,6 @@ class BilinearInitializer(Initializer):
if np.prod(shape) > 1024 * 1024:
raise ValueError("The size of input is too big. ")
if framework._non_static_mode():
if in_dygraph_mode():
_C_ops.assign_value_(
out_var,
......@@ -1154,31 +994,12 @@ class BilinearInitializer(Initializer):
values,
_current_expected_place(),
)
elif _in_legacy_dygraph():
_legacy_C_ops.assign_value(
out_var,
'shape',
list(shape),
'dtype',
out_dtype,
value_name,
values,
)
if var.dtype in [
VarDesc.VarType.FP16,
VarDesc.VarType.BF16,
VarDesc.VarType.FP64,
]:
if in_dygraph_mode():
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var)
else:
out_var._share_underline_tensor_to(var)
......@@ -1285,7 +1106,6 @@ class NumpyArrayInitializer(Initializer):
"saving it to file and 'load_op' to load it"
)
if framework._non_static_mode():
if in_dygraph_mode():
_C_ops.assign_value_(
out_var,
......@@ -1294,27 +1114,8 @@ class NumpyArrayInitializer(Initializer):
values,
_current_expected_place(),
)
elif _in_legacy_dygraph():
_legacy_C_ops.assign_value(
out_var,
'shape',
list(self._value.shape),
'dtype',
out_dtype,
value_name,
values,
)
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
if in_dygraph_mode():
var_tmp = _C_ops.cast(out_var, var.dtype)
elif _in_legacy_dygraph():
var_tmp = _legacy_C_ops.cast(
out_var,
'in_dtype',
out_var.dtype,
'out_dtype',
var.dtype,
)
var_tmp._share_underline_tensor_to(var)
else:
out_var._share_underline_tensor_to(var)
......
......@@ -62,7 +62,6 @@ from .wrapped_decorator import signature_safe_contextmanager
import warnings
from paddle import _C_ops, _legacy_C_ops
from ..fluid.framework import (
_in_legacy_dygraph,
in_dygraph_mode,
_current_expected_place,
)
......@@ -130,7 +129,7 @@ class Optimizer:
list(parameter_list) if parameter_list is not None else None
)
self._name = name
if framework._non_static_mode():
if in_dygraph_mode():
if not isinstance(
learning_rate, (float, LearningRateDecay, LRScheduler)
):
......@@ -532,17 +531,6 @@ class Optimizer:
current_lr.dtype,
place,
)
elif _in_legacy_dygraph():
_legacy_C_ops.fill_constant(
current_lr,
'value',
float(value),
'dtype',
current_lr.dtype,
'shape',
list(current_lr.shape),
)
else:
global_block = (
framework.default_main_program().global_block()
......@@ -703,7 +691,7 @@ class Optimizer:
name in self._accumulators
and param.name in self._accumulators[name]
):
if framework._non_static_mode():
if in_dygraph_mode():
return self._accumulators[name][param.name]
raise Exception(
"Accumulator {} already exists for parameter {}".format(
......@@ -723,7 +711,7 @@ class Optimizer:
persistable=True,
dtype=dtype or param.dtype,
type=core.VarDesc.VarType.LOD_TENSOR
if framework._non_static_mode()
if in_dygraph_mode()
else (param.type if type is None else type),
shape=shape,
belong_to_optimizer=True,
......@@ -735,7 +723,7 @@ class Optimizer:
var, initializer=Constant(value=float(fill_value))
)
if framework._non_static_mode():
if in_dygraph_mode():
if len(self._accumulators_holder) > 0:
assert (
var_name in self._accumulators_holder
......@@ -770,7 +758,7 @@ class Optimizer:
if self._name is not None:
name = self._name + "_" + name
if name in self._global_accumulators:
if framework._non_static_mode():
if in_dygraph_mode():
return self._global_accumulators[name]
raise Exception("Global accumulator {} already exists".format(name))
if shape is None:
......@@ -796,7 +784,7 @@ class Optimizer:
var, initializer=Constant(value=float(fill_value))
)
if framework._non_static_mode():
if in_dygraph_mode():
if len(self._accumulators_holder) > 0:
assert (
var_name in self._accumulators_holder
......@@ -911,7 +899,7 @@ class Optimizer:
)
self._create_global_learning_rate()
if framework._non_static_mode():
if in_dygraph_mode():
for param_and_grad in parameters_and_grads:
if param_and_grad[1] is None:
continue
......@@ -1018,7 +1006,7 @@ class Optimizer:
See examples in ``apply_gradients``.
"""
act_no_grad_set = None
if framework._non_static_mode():
if in_dygraph_mode():
pass
else:
act_no_grad_set = self._get_no_grad_set(loss, no_grad_set)
......@@ -1027,7 +1015,7 @@ class Optimizer:
if self._dtype is None:
self._dtype = loss.dtype
if framework._non_static_mode():
if in_dygraph_mode():
parameter_list = (
parameter_list if parameter_list else self._parameter_list
)
......@@ -1084,7 +1072,7 @@ class Optimizer:
assert regularization_term is not None
if framework._non_static_mode():
if in_dygraph_mode():
return _legacy_C_ops.sum([grad, regularization_term])
new_grad = grad
......@@ -1131,7 +1119,7 @@ class Optimizer:
Exception: Unknown regularization type
"""
params_and_grads = []
if framework._non_static_mode():
if in_dygraph_mode():
for param, grad in parameters_and_grads:
new_grad = self._create_regularization_of_grad(
param, grad, regularization
......@@ -1302,7 +1290,7 @@ class Optimizer:
Returns:
list: A list of operators appended to the current program.
"""
if framework._non_static_mode():
if in_dygraph_mode():
with program_guard(
framework.default_main_program(),
framework.default_startup_program(),
......@@ -1562,17 +1550,7 @@ class SGDOptimizer(Optimizer):
find_master,
)
return None
if _in_legacy_dygraph():
_legacy_C_ops.sgd(
param_and_grad[0],
lr,
param_and_grad[1],
master_weight,
param_and_grad[0],
master_weight,
)
return None
else:
assert isinstance(block, framework.Block)
# create the optimize op
inputs = {
......@@ -1710,7 +1688,7 @@ class MomentumOptimizer(Optimizer):
)
lr = self._create_param_lr(param_and_grad)
master_weight = None
if framework._non_static_mode():
if in_dygraph_mode():
_, _, _ = _legacy_C_ops.momentum(
param_and_grad[0],
param_and_grad[1],
......@@ -1726,7 +1704,7 @@ class MomentumOptimizer(Optimizer):
self._use_nesterov,
)
return None
else:
attrs = {"mu": self._momentum, "use_nesterov": self._use_nesterov}
inputs = {
"Param": [param_and_grad[0]],
......@@ -1974,7 +1952,7 @@ class LarsMomentumOptimizer(Optimizer):
inputs["MasterParam"] = master_weight
outputs["MasterParamOut"] = master_weight
if framework._non_static_mode():
if in_dygraph_mode():
tmp, tmp2 = _legacy_C_ops.lars_momentum(
[param_and_grad[0]],
[param_and_grad[1]],
......@@ -2123,18 +2101,6 @@ class AdagradOptimizer(Optimizer):
self._epsilon,
)
return None
elif _in_legacy_dygraph():
_legacy_C_ops.adagrad(
param_and_grad[0],
param_and_grad[1],
moment_acc,
self._create_param_lr(param_and_grad),
param_and_grad[0],
moment_acc,
"epsilon",
self._epsilon,
)
return None
else:
# Create the adagrad optimizer op
adagrad_op = block.append_op(
......@@ -2430,7 +2396,7 @@ class AdamOptimizer(Optimizer):
lr = self._create_param_lr(param_and_grad)
# create the adam optimize op
if framework._non_static_mode():
if in_dygraph_mode():
_beta1 = (
self._beta1
if not isinstance(self._beta1, Variable)
......@@ -2721,7 +2687,7 @@ class AdamaxOptimizer(Optimizer):
self._beta1_pow_acc_str, param_and_grad[0]
)
if framework.in_dygraph_mode():
if in_dygraph_mode():
_C_ops.adamax_(
param_and_grad[0],
param_and_grad[1],
......@@ -2733,24 +2699,6 @@ class AdamaxOptimizer(Optimizer):
self._beta2,
self._epsilon,
)
elif framework._in_legacy_dygraph():
_legacy_C_ops.adamax(
param_and_grad[0],
param_and_grad[1],
self._create_param_lr(param_and_grad),
moment,
inf_norm,
beta1_pow_acc,
param_and_grad[0],
moment,
inf_norm,
"beta1",
self._beta1,
"beta2",
self._beta2,
"epsilon",
self._epsilon,
)
else:
# create the adamax optimize op
adamax_op = block.append_op(
......@@ -2790,15 +2738,8 @@ class AdamaxOptimizer(Optimizer):
beta1_pow_acc = self._get_accumulator(
self._beta1_pow_acc_str, param
)
if framework._non_static_mode():
if framework.in_dygraph_mode():
tmp = _C_ops.scale(
beta1_pow_acc, self._beta1, 0.0, True
)
else:
tmp = _legacy_C_ops.scale(
beta1_pow_acc, "scale", self._beta1
)
if in_dygraph_mode():
tmp = _C_ops.scale(beta1_pow_acc, self._beta1, 0.0, True)
beta1_pow_acc.copy_(tmp, False)
else:
block.append_op(
......@@ -2891,7 +2832,7 @@ class DpsgdOptimizer(Optimizer):
if self._seed is None:
self._seed = 0
if framework._non_static_mode():
if in_dygraph_mode():
_legacy_C_ops.dpsgd(
param_and_grad[0],
param_and_grad[1],
......@@ -3023,7 +2964,7 @@ class DecayedAdagradOptimizer(Optimizer):
self._moment_acc_str, param_and_grad[0]
)
if framework._non_static_mode():
if in_dygraph_mode():
_legacy_C_ops.decayed_adagrad(
param_and_grad[0],
param_and_grad[1],
......@@ -3160,7 +3101,7 @@ class AdadeltaOptimizer(Optimizer):
self._avg_squared_update_acc_str, param_and_grad[0]
)
if framework.in_dygraph_mode():
if in_dygraph_mode():
_C_ops.adadelta_(
param_and_grad[0],
param_and_grad[1],
......@@ -3169,20 +3110,6 @@ class AdadeltaOptimizer(Optimizer):
self._rho,
self._epsilon,
)
elif framework._in_legacy_dygraph():
_legacy_C_ops.adadelta(
param_and_grad[0],
param_and_grad[1],
avg_squared_grad_acc,
avg_squared_update_acc,
param_and_grad[0],
avg_squared_grad_acc,
avg_squared_update_acc,
"epsilon",
self._epsilon,
"rho",
self._rho,
)
else:
# Create the adadelta optimizer op
adadelta_op = block.append_op(
......@@ -3387,27 +3314,6 @@ class RMSPropOptimizer(Optimizer):
self._centered,
)
return None
elif _in_legacy_dygraph():
_legacy_C_ops.rmsprop(
param_and_grad[0],
mean_square_acc,
self._create_param_lr(param_and_grad),
param_and_grad[1],
momentum_acc,
param_and_grad[0],
momentum_acc,
mean_square_acc,
mean_grad_acc,
"epsilon",
self._epsilon,
"decay",
self._rho,
"momentum",
self._momentum,
"centered",
self._centered,
)
return None
else:
rmsprop_op = block.append_op(
type=self.type,
......@@ -3581,7 +3487,7 @@ class FtrlOptimizer(Optimizer):
linear_acc = self._get_accumulator(
self._linear_acc_str, param_and_grad[0]
)
if framework._non_static_mode():
if in_dygraph_mode():
_legacy_C_ops.ftrl(
param_and_grad[0],
squared_acc,
......@@ -3763,7 +3669,7 @@ class LambOptimizer(AdamOptimizer):
weight_decay = self._weight_decay
lr = self._create_param_lr(param_and_grad)
master_weight = None
if framework._non_static_mode():
if in_dygraph_mode():
_legacy_C_ops.lamb(
param_and_grad[0],
param_and_grad[1],
......@@ -3940,7 +3846,7 @@ class ModelAverage(Optimizer):
regularization=None,
name=None,
):
if framework._non_static_mode():
if in_dygraph_mode():
raise Exception("In dygraph, don't support ModelAverage.")
super().__init__(0.0, regularization=regularization, name=name)
self.average_window = average_window_rate
......@@ -4269,7 +4175,7 @@ class ExponentialMovingAverage:
"""
def __init__(self, decay=0.999, thres_steps=None, name=None):
if framework._non_static_mode():
if in_dygraph_mode():
raise Exception(
"In dygraph, don't support ExponentialMovingAverage."
)
......@@ -4494,7 +4400,7 @@ class PipelineOptimizer:
self._device = "npu"
elif core.is_compiled_with_cuda():
self._device = "gpu"
if framework._non_static_mode():
if in_dygraph_mode():
raise Exception("In dygraph, don't support PipelineOptimizer.")
valid_optimizers = (
Optimizer,
......@@ -6451,7 +6357,7 @@ class RecomputeOptimizer(Optimizer):
"""
def __init__(self, optimizer):
if framework._non_static_mode():
if in_dygraph_mode():
raise Exception("In dygraph, don't support RecomputeOptimizer.")
self._optimizer = optimizer
self._checkpoints = None
......@@ -7066,7 +6972,7 @@ class RecomputeOptimizer(Optimizer):
self._checkpoints is not None
), "You should call _set_checkpoints first"
if framework._non_static_mode():
if in_dygraph_mode():
raise NotImplementedError(
"DyGraph current does not support recompute"
)
......@@ -7164,7 +7070,7 @@ class RecomputeOptimizer(Optimizer):
assert (
self._checkpoints is not None
), "You should call _set_checkpoints first"
if framework._non_static_mode():
if in_dygraph_mode():
raise NotImplementedError(
"DyGraph current does not support recompute"
)
......@@ -7248,7 +7154,7 @@ class LookaheadOptimizer:
def __init__(self, inner_optimizer, alpha=0.5, k=5):
if framework._non_static_mode():
if in_dygraph_mode():
raise Exception("In dygraph, don't support LookaheadOptimizer.")
assert inner_optimizer is not None, "inner optimizer can not be None"
assert (
......@@ -7427,7 +7333,7 @@ class GradientMergeOptimizer:
GRAD_MERGE_COND_NAME = "grad_merge_cond_name"
def __init__(self, inner_optimizer, k_steps=1, avg=True):
if framework._non_static_mode():
if in_dygraph_mode():
raise Exception(
"In dygraph, we don't support GradientMergeOptimizer."
"You can do Gradient merge by yourself with k-times forward + backward, "
......
......@@ -347,6 +347,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
)
def test_with_dataloader(self):
paddle.disable_static()
for device in self.devices:
paddle.set_device(device)
# data loader
......@@ -377,6 +378,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
if batch_id == 5:
break
paddle.enable_static()
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册